bandersnatch package

Package contents

Submodules

bandersnatch.configuration module

Module containing classes to access the bandersnatch configuration file

class bandersnatch.configuration.BandersnatchConfig(*args: Any, **kwargs: Any)[source]

Bases: object

SHOWN_DEPRECATIONS = False
check_for_deprecations() None[source]
load_configuration() None[source]

Read the configuration from a configuration file

class bandersnatch.configuration.SetConfigValues(json_save, root_uri, diff_file_path, diff_append_epoch, digest_name, storage_backend_name, cleanup, release_files_save, compare_method, download_mirror, download_mirror_no_fallback, simple_format)[source]

Bases: NamedTuple

cleanup: bool

Alias for field number 6

compare_method: str

Alias for field number 8

diff_append_epoch: bool

Alias for field number 3

diff_file_path: str

Alias for field number 2

digest_name: str

Alias for field number 4

download_mirror: str

Alias for field number 9

download_mirror_no_fallback: bool

Alias for field number 10

json_save: bool

Alias for field number 0

release_files_save: bool

Alias for field number 7

root_uri: str

Alias for field number 1

simple_format: SimpleFormat

Alias for field number 11

storage_backend_name: str

Alias for field number 5

class bandersnatch.configuration.Singleton[source]

Bases: type

bandersnatch.configuration.validate_config_values(config: ConfigParser) SetConfigValues[source]

bandersnatch.delete module

async bandersnatch.delete.delete_packages(config: ConfigParser, args: Namespace, master: Master) int[source]
async bandersnatch.delete.delete_path(blob_path: Path, dry_run: bool = False) int[source]
async bandersnatch.delete.delete_simple_page(simple_base_path: Path, package: str, hash_index: bool = False, dry_run: bool = True) int[source]

bandersnatch.filter module

Blocklist management

class bandersnatch.filter.Filter(*args: Any, **kwargs: Any)[source]

Bases: object

Base Filter class

property allowlist: SectionProxy
property blocklist: SectionProxy
check_match(**kwargs: Any) bool[source]

Check if the plugin matches based on the arguments provides.

Returns:

True if the values match a filter rule, False otherwise

Return type:

bool

deprecated_name: str = ''
filter(metadata: dict) bool[source]

Check if the plugin matches based on the package’s metadata.

Returns:

True if the values match a filter rule, False otherwise

Return type:

bool

initialize_plugin() None[source]

Code to initialize the plugin

name = 'filter'
pinned_version_exists(metadata: dict) bool[source]

Check if version specifier exist.

Returns:

True if version specifier exist, False otherwise

Return type:

bool

class bandersnatch.filter.FilterMetadataPlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that blocks sync operations for an entire project based on info fields.

name = 'metadata_plugin'
class bandersnatch.filter.FilterProjectPlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that blocks sync operations for an entire project

name = 'project_plugin'
class bandersnatch.filter.FilterReleaseFilePlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that modify the download of specific release or dist files

name = 'release_file_plugin'
class bandersnatch.filter.FilterReleasePlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that modifies the download of specific releases or dist files

name = 'release_plugin'
class bandersnatch.filter.LoadedFilters(load_all: bool = False)[source]

Bases: object

A class to load all of the filters enabled

ENTRYPOINT_GROUPS = ['bandersnatch_filter_plugins.v2.project', 'bandersnatch_filter_plugins.v2.metadata', 'bandersnatch_filter_plugins.v2.release', 'bandersnatch_filter_plugins.v2.release_file']
filter_metadata_plugins() list[Filter][source]

Load and return the metadata filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

filter_project_plugins() list[Filter][source]

Load and return the project filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

filter_release_file_plugins() list[Filter][source]

Load and return the release file filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

filter_release_plugins() list[Filter][source]

Load and return the release filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

bandersnatch.log module

bandersnatch.log.setup_logging(args: Any) StreamHandler[source]

bandersnatch.main module

async bandersnatch.main.async_main(args: Namespace, config: ConfigParser) int[source]
bandersnatch.main.main(loop: AbstractEventLoop | None = None) int[source]

bandersnatch.master module

class bandersnatch.master.Master(url: str, timeout: float = 10.0, global_timeout: float | None = 18000.0, proxy: str | None = None)[source]

Bases: object

async all_packages() Any[source]
async changed_packages(last_serial: int) dict[str, int][source]
async check_for_stale_cache(path: str, required_serial: int | None, got_serial: int | None) None[source]
async get(path: str, required_serial: int | None, **kw: Any) AsyncGenerator[ClientResponse, None][source]
async get_package_metadata(package_name: str, serial: int = 0) Any[source]
async rpc(method_name: str, serial: int = 0) Any[source]
async url_fetch(url: str, file_path: Path, executor: ProcessPoolExecutor | ThreadPoolExecutor | None = None, chunk_size: int = 65536) None[source]
property xmlrpc_url: str
exception bandersnatch.master.StalePage[source]

Bases: Exception

We got a page back from PyPI that doesn’t meet our expected serial.

exception bandersnatch.master.XmlRpcError[source]

Bases: ClientError

Issue getting package listing from PyPI Repository

bandersnatch.mirror module

class bandersnatch.mirror.BandersnatchMirror(homedir: Path, master: Master, storage_backend: str | None = None, stop_on_error: bool = False, workers: int = 3, hash_index: bool = False, json_save: bool = False, digest_name: str | None = None, root_uri: str | None = None, keep_index_versions: int = 0, diff_file: Path | str | None = None, diff_append_epoch: bool = False, diff_full_path: Path | str | None = None, flock_timeout: int = 1, diff_file_list: list[Path] | None = None, *, cleanup: bool = False, release_files_save: bool = True, compare_method: str | None = None, download_mirror: str | None = None, download_mirror_no_fallback: bool | None = False, simple_format: SimpleFormat | str = 'ALL')[source]

Bases: Mirror

async cleanup_non_pep_503_paths(package: Package) None[source]

Before 4.0 we use to store backwards compatible named dirs for older pip This function checks for them and cleans them up

async determine_packages_to_sync() None[source]

Update the self.packages_to_sync to contain packages that need to be synced.

async download_file(url: str, file_size: str, upload_time: datetime, sha256sum: str, chunk_size: int = 65536, urlpath: str = '') Path | None[source]
errors = False
finalize_sync(sync_index_page: bool = True) None[source]
find_target_serial() int[source]
property generationfile: Path
json_file(package_name: str) Path[source]
need_index_sync = True
need_wrapup = False
on_error(exception: BaseException, **kwargs: dict) None[source]
populate_download_urls(release_file: dict[str, str]) tuple[str, list[str]][source]

Populate download URLs for a certain file, possible combinations are:

  • download_mirror is not set: return “url” attribute from release_file

  • download_mirror is set, no_fallback is false: prepend “download_mirror + path” before “url”

  • download_mirror is set, no_fallback is true: return only “download_mirror + path”

Theoritically we are able to support multiple download mirrors by prepending more urls in the list.

async process_package(package: Package) None[source]
record_finished_package(name: str) None[source]
save_json_metadata(package_info: dict, name: str) bool[source]

Take the JSON metadata we just fetched and save to disk

simple_directory(package: Package) Path[source]
property statusfile: Path
async sync_release_files(package: Package) None[source]

Purge + download files returning files removed + added

sync_simple_pages(package: Package) None[source]
property todolist: Path
property webdir: Path
wrapup_successful_sync() None[source]
write_simple_pages(package: Package, content: SimpleFormats) None[source]
class bandersnatch.mirror.Mirror(master: Master, workers: int = 3)[source]

Bases: object

async determine_packages_to_sync() None[source]

Update the self.packages_to_sync to contain packages that need to be synced.

finalize_sync(sync_index_page: bool = True) None[source]
now = None
on_error(exception: BaseException, **kwargs: dict) None[source]
async package_syncer(idx: int) None[source]
packages_to_sync: dict[str, int | str] = {}
async process_package(package: Package) None[source]
async sync_packages() None[source]
synced_serial: int | None = 0
async synchronize(specific_packages: list[str] | None = None, sync_simple_index: bool = True) dict[str, set[str]][source]
target_serial: int | None = None
async bandersnatch.mirror.mirror(config: ConfigParser, specific_packages: list[str] | None = None, sync_simple_index: bool = True) int[source]

bandersnatch.package module

class bandersnatch.package.Package(name: str, serial: int = 0)[source]

Bases: object

filter_all_releases(release_filters: list[Filter]) bool[source]

Filter releases and removes releases that fail the filters

filter_all_releases_files(release_file_filters: list[Filter]) bool[source]

Filter release files and remove empty releases after doing so.

filter_metadata(metadata_filters: list[Filter]) bool[source]

Run the metadata filtering plugins

property info: Any
property last_serial: int
property metadata: dict[str, Any]
property release_files: list
property releases: Any
async update_metadata(master: Master, attempts: int = 3) None[source]

bandersnatch.storage module

Storage management

class bandersnatch.storage.Storage(*args: Any, config: ConfigParser | None = None, **kwargs: Any)[source]

Bases: object

Base Storage class

PATH_BACKEND

alias of Path

static canonicalize_package(name: str) str[source]
compare_files(file1: Path | str, file2: Path | str) bool[source]

Compare two files and determine whether they contain the same data. Return True if they match

copy_file(source: Path | str, dest: Path | str) None[source]

Copy a file from source to dest

delete(path: Path | str, dry_run: bool = False) int[source]

Delete the provided path.

delete_file(path: Path | str, dry_run: bool = False) int[source]

Delete the provided path, recursively if necessary.

property directory: str
exists(path: Path | str) bool[source]

Check whether the provided path exists

find(root: Path | str, dirs: bool = True) str[source]

A test helper simulating ‘find’.

Iterates over directories and filenames, given as relative paths to the root.

get_file_size(path: Path | str) int[source]

Get the size of a given path in bytes

get_hash(path: Path | str, function: str = 'sha256') str[source]

Get the sha256sum of a given path

get_json_paths(name: str) Sequence[Path | str][source]
get_lock(path: str) BaseFileLock[source]

Retrieve the appropriate FileLock backend for this storage plugin

Parameters:

path (str) – The path to use for locking

Returns:

A FileLock backend for obtaining locks

Return type:

filelock.BaseFileLock

get_upload_time(path: Path | str) datetime[source]

Get the upload time of a given path

hash_file(path: Path | str, function: str = 'sha256') str[source]
initialize_plugin() None[source]

Code to initialize the plugin

is_dir(path: Path | str) bool[source]

Check whether the provided path is a directory.

is_file(path: Path | str) bool[source]

Check whether the provided path is a file.

iter_dir(path: Path | str) Generator[Path | str, None, None][source]

Iterate over the path, returning the sub-paths

mkdir(path: Path | str, exist_ok: bool = False, parents: bool = False) None[source]

Create the provided directory

move_file(source: Path | str, dest: Path | str) None[source]

Move a file from source to dest

name = 'storage'
open_file(path: Path | str, text: bool = True) Generator[IO, None, None][source]

Yield a file context to iterate over. If text is true, open the file with ‘rb’ mode specified.

read_file(path: Path | str, text: bool = True, encoding: str = 'utf-8', errors: str | None = None) str | bytes[source]

Yield a file context to iterate over. If text is true, open the file with ‘rb’ mode specified.

rewrite(filepath: Path | str, mode: str = 'w', **kw: Any) Generator[IO, None, None][source]

Rewrite an existing file atomically to avoid programs running in parallel to have race conditions while reading.

rmdir(path: Path | str, recurse: bool = False, force: bool = False, ignore_errors: bool = False, dry_run: bool = False) int[source]

Remove the directory. If recurse is True, allow removing empty children. If force is true, remove contents destructively.

scandir(path: Path | str) Generator[StorageDirEntry, None, None][source]

Read entries from the provided directory

set_upload_time(path: Path | str, time: datetime) None[source]

Set the upload time of a given path

Create a symlink at dest that points back at source

update_safe(filename: Path | str, **kw: Any) Generator[IO, None, None][source]

Rewrite a file atomically.

Clients are allowed to delete the tmpfile to signal that they don’t want to have it updated.

write_file(path: Path | str, contents: str | bytes) None[source]

Write data to the provided path. If contents is a string, the file will be opened and written in “r” + “utf-8” mode, if bytes are supplied it will be accessed using “rb” mode (i.e. binary write).

class bandersnatch.storage.StorageDirEntry(*args, **kwargs)[source]

Bases: Protocol

is_dir() bool[source]
is_file() bool[source]
property name: str | bytes
property path: str | bytes
class bandersnatch.storage.StoragePlugin(*args: Any, config: ConfigParser | None = None, **kwargs: Any)[source]

Bases: Storage

Plugin that provides a storage backend for bandersnatch

name = 'storage_plugin'
bandersnatch.storage.load_storage_plugins(entrypoint_group: str, enabled_plugin: str | None = None, config: ConfigParser | None = None, clear_cache: bool = False) set[Storage][source]

Load all storage plugins that are registered with pkg_resources

Parameters:
  • entrypoint_group (str) – The entrypoint group name to load plugins from

  • enabled_plugin (str) – The optional enabled storage plugin to search for

  • config (configparser.ConfigParser) – The optional configparser instance to pass in

  • clear_cache (bool) – Whether to clear the plugin cache

Returns:

A list of objects derived from the Storage class

Return type:

List of Storage

bandersnatch.storage.storage_backend_plugins(backend: str | None = 'filesystem', config: ConfigParser | None = None, clear_cache: bool = False) Iterable[Storage][source]

Load and return the release filtering plugin objects

Parameters:
  • backend (str) – The optional enabled storage plugin to search for

  • config (configparser.ConfigParser) – The optional configparser instance to pass in

  • clear_cache (bool) – Whether to clear the plugin cache

Returns:

List of objects derived from the bandersnatch.storage.Storage class

Return type:

list of bandersnatch.storage.Storage

bandersnatch.utils module

class bandersnatch.utils.StrEnum(value)[source]

Bases: str, Enum

Enumeration class where members can be treated as strings.

value: str
bandersnatch.utils.bandersnatch_safe_name(name: str) str[source]

Convert an arbitrary string to a standard distribution name Any runs of non-alphanumeric/. characters are replaced with a single ‘-‘.

  • This was copied from pkg_resources (part of setuptools)

bandersnatch also lower cases the returned name

bandersnatch.utils.convert_url_to_path(url: str) str[source]
bandersnatch.utils.find(root: Path | str, dirs: bool = True) str[source]

A test helper simulating ‘find’.

Iterates over directories and filenames, given as relative paths to the root.

bandersnatch.utils.find_all_files(files: set[Path], base_dir: Path) None[source]
bandersnatch.utils.hash(path: Path, function: str = 'sha256') str[source]
bandersnatch.utils.make_time_stamp() str[source]

Helper function that returns a timestamp suitable for use in a filename on any OS

bandersnatch.utils.parse_version(version: str) list[str][source]

Converts a version string to a list of strings to check the 1st part of build tags. See PEP 425 (https://peps.python.org/pep-0425/#python-tag) for details.

Parameters:

version (str) – string in the form of ‘{major}.{minor}’ e.g. ‘3.6’

Returns:

list of 1st element strings from build tag tuples

See https://peps.python.org/pep-0425/#python-tag for details. Some Windows binaries have only the 1st part before the file extension. e.g. [‘-cp36-’, ‘-pp36-’, ‘-ip36-’, ‘-jy36-’, ‘-py3.6-’, ‘-py3.6.’]

Return type:

List[str]

bandersnatch.utils.removeprefix(original: str, prefix: str) str[source]
Return a string with the given prefix string removed if present.

If the string starts with the prefix string, return string[len(prefix):]. Otherwise, return the original string.

Parameters:
  • original (str) – string to remove the prefix (e.g. ‘py3.6’)

  • prefix (str) – the prefix to remove (e.g. ‘py’)

Returns:

either the modified or the original string (e.g. ‘3.6’)

Return type:

str

bandersnatch.utils.rewrite(filepath: str | Path, mode: str = 'w', **kw: Any) Generator[IO, None, None][source]

Rewrite an existing file atomically to avoid programs running in parallel to have race conditions while reading.

Remove a file and if the dir is empty remove it

bandersnatch.utils.user_agent() str[source]

bandersnatch.verify module

async bandersnatch.verify.delete_unowned_files(mirror_base: Path, executor: ThreadPoolExecutor, all_package_files: list[Path], dry_run: bool) int[source]
async bandersnatch.verify.get_latest_json(master: Master, json_path: Path, executor: ThreadPoolExecutor | None = None, delete_removed_packages: bool = False) None[source]
async bandersnatch.verify.metadata_verify(config: ConfigParser, args: Namespace) int[source]

Crawl all saved JSON metadata or online to check we have all packages if delete - generate a diff of unowned files

bandersnatch.verify.on_error(stop_on_error: bool, exception: BaseException, package: str) None[source]
async bandersnatch.verify.verify(master: Master, config: ConfigParser, json_file: str, mirror_base_path: Path, all_package_files: list[Path], args: Namespace, executor: ThreadPoolExecutor | None = None, releases_key: str = 'releases') None[source]
async bandersnatch.verify.verify_producer(master: Master, config: ConfigParser, all_package_files: list[Path], mirror_base_path: Path, json_files: list[str], args: Namespace, executor: ThreadPoolExecutor | None = None) None[source]