Core API#

class PreparedReceptorSpec(receptor_id, receptor_pdbqt, center, size)#

Bases: object

Fully prepared receptor definition ready for campaign construction.

Parameters:

receptor_id (str) – Unique receptor identifier used inside the docking campaign.
receptor_pdbqt (Path) – Path to the final receptor .pdbqt file.
center (Vec3) – Docking box center as (x, y, z).
size (Vec3) – Docking box size as (sx, sy, sz).

receptor_id: str#

receptor_pdbqt: Path#

center: Tuple[float, float, float]#

size: Tuple[float, float, float]#

class ProDockResult( project_dir, ligand_dir, campaign_json, receptors, receptor_pdb_by_id, campaign, docking_results, pose_df, interaction_result, merged_df, interaction_df, summary_df, compact_interactions, db_path, )#

Bases: object

Structured result bundle returned by ProDockPipeline.

Parameters:

project_dir (Path) – Root working directory of the project.
ligand_dir (Path) – Directory containing final ligand files used by the campaign.
campaign_json (Path) – Path to the generated campaign JSON file.
receptors (List[PreparedReceptorSpec]) – Prepared receptor specifications included in the campaign.
receptor_pdb_by_id (Dict[str, Path]) – Mapping from receptor identifier to receptor .pdb file used for interaction analysis.
campaign (Campaign) – In-memory campaign object.
docking_results (Any) – Raw docking results returned by prodock.dock.BatchDock.
pose_df (pandas.DataFrame) – Pose table collected by prodock.postprocess.pose.PoseCrawler.
interaction_result (Any) – Raw interaction extraction result object, or None if interaction extraction was skipped.
merged_df (pandas.DataFrame) – Final dataframe chosen for downstream insertion into the database. This is the interaction-merged dataframe if interaction extraction is enabled; otherwise it is the crawled pose dataframe.
interaction_df (Optional[pandas.DataFrame]) – Long-form interaction-event dataframe, or None.
summary_df (Optional[pandas.DataFrame]) – Pose-level interaction summary dataframe, or None.
compact_interactions (Optional[Dict[str, Any]]) – Compact per-pose interaction dictionary, or None.
db_path (Optional[Path]) – SQLite database path if database writing was enabled.

project_dir: Path#

ligand_dir: Path#

campaign_json: Path#

receptors: List[PreparedReceptorSpec]#

receptor_pdb_by_id: Dict[str, Path]#

campaign: Campaign#

docking_results: Any#

pose_df: pandas.DataFrame#

interaction_result: Any#

merged_df: pandas.DataFrame#

interaction_df: pandas.DataFrame | None#

summary_df: pandas.DataFrame | None#

compact_interactions: Dict[str, Any] | None#

db_path: Path | None#

class ProDockPipeline( project_dir, *, engines=None, cpu=4, seed=42, exhaustiveness=8, n_poses=10, n_jobs=None, progress=True, receptor_use_meeko=False, ligand_output_format='pdbqt', ligand_backend='meeko', box_scale=2.0, box_isotropic=True, campaign_name='campaign.json', log_file='prodock.log', log_level='INFO', log_colored=True, log_json=False, )#

Bases: object

High-level orchestration helper for ProDock projects.

This class provides a single automation entry point for common workflows:

raw receptor records + ligand SMILES records
prebuilt receptor .pdbqt files + explicit docking box coordinates
optional pose crawling after docking
optional interaction extraction
optional database creation and insertion

All generated data are organized under project_dir.

Parameters:

project_dir (PathLike) – Root directory used for all generated project files.
engines (Optional[Sequence[str]]) – Docking engines to include in the campaign. Default is ["smina", "qvina"].
cpu (int) – Per-engine CPU setting stored in the campaign.
seed (int) – Random seed stored in the campaign.
exhaustiveness (int) – Exhaustiveness stored in the campaign.
n_poses (int) – Number of poses stored in the campaign.
n_jobs (Optional[int]) – Number of parallel jobs used by BatchDock. If None, this defaults to cpu.
progress (bool) – Whether to enable progress reporting in BatchDock.
receptor_use_meeko (bool) – Whether receptor preparation should use Meeko.
ligand_output_format (str) – Final ligand output format.
ligand_backend (str) – Ligand conversion backend used by LigandPrep.
box_scale (float) – Scale factor used when computing a grid box from a reference ligand.
box_isotropic (bool) – Whether ligand-derived boxes should be isotropic.
campaign_name (str) – Default campaign JSON file name.
log_file (str)
log_level (Union[str, int])
log_colored (bool)
log_json (bool)

Example#

pipeline = ProDockPipeline("Data/testcase/Multi")
result = pipeline.run(
    receptors=RECEPTORS,
    ligands=LIGANDS,
    extract_interaction=True,
)
print(result.campaign_json)
print(result.db_path)

Example#

pipeline = ProDockPipeline("Data/testcase/Multi")
result = pipeline.run(
    prepared_receptors=[
        {
            "receptor_id": "4WKQ",
            "receptor_pdbqt": "Data/testcase/Multi/4WKQ/filtered_protein/4WKQ.pdbqt",
            "center": (2.865, 193.257, 21.367),
            "size": (27.091, 27.091, 27.091),
        }
    ],
    ligand_dir="Data/testcase/Multi/ligands",
    extract_interaction=True,
)
print(result.receptor_pdb_by_id)

prepare_receptors(*, receptors=None, prepared_receptors=None)#

Prepare receptor inputs for campaign construction.

Exactly one receptor input mode must be supplied:

receptors for raw PDB-queryable receptors
prepared_receptors for already prepared receptor .pdbqt files

Raw mode will:

call PDBQuery.process_batch()
prepare receptor .pdb to .pdbqt
derive box coordinates from explicit box values or a reference ligand

Prepared mode will only validate and normalize the provided paths and box definitions.

Parameters:

receptors (Optional[List[Dict[str, Any]]]) – Raw receptor records, typically compatible with PDBQuery.process_batch().
prepared_receptors (Optional[List[Dict[str, Any]]]) – List of prebuilt receptor records. Each record must contain at least receptor_pdbqt (or receptor), center, and size.

Returns:

Normalized prepared receptor specifications.

Return type:

List[PreparedReceptorSpec]

Raises:

ValueError – Raised if both receptor modes are provided or if neither is provided.
FileNotFoundError – Raised if an expected receptor or reference-ligand file is missing.

Example#

receptor_specs = pipeline.prepare_receptors(
    receptors=RECEPTORS
)

Example#

receptor_specs = pipeline.prepare_receptors(
    prepared_receptors=[
        {
            "receptor_id": "4WKQ",
            "receptor_pdbqt": "Data/testcase/Multi/4WKQ/filtered_protein/4WKQ.pdbqt",
            "center": (2.865, 193.257, 21.367),
            "size": (27.091, 27.091, 27.091),
        }
    ]
)

prepare_ligands(*, ligands=None, ligand_dir=None)#

Prepare or resolve ligand inputs.

Exactly one ligand input mode must be supplied:

ligands as a list of SMILES records
ligand_dir as an existing directory of prepared ligands

When ligands is used, the output directory defaults to <project_dir>/ligands.

Parameters:

ligands (Optional[List[Dict[str, str]]]) – Ligand records, typically containing id and smiles.
ligand_dir (Optional[PathLike]) – Existing directory containing prepared ligand files.

Returns:

Absolute path to the ligand directory used by the campaign.

Return type:

Path

Raises:

ValueError – Raised if both ligand modes are provided or if neither is provided.
FileNotFoundError – Raised if the provided ligand directory does not exist.
NotADirectoryError – Raised if the provided ligand path is not a directory.

Example#

ligand_dir = pipeline.prepare_ligands(
    ligands=LIGANDS
)

Example#

ligand_dir = pipeline.prepare_ligands(
    ligand_dir="Data/testcase/Multi/ligands"
)

build_campaign(*, receptor_specs, ligand_dir)#

Build a Campaign from prepared receptors and ligands.

Parameters:

receptor_specs (Sequence[PreparedReceptorSpec]) – Prepared receptor specifications.
ligand_dir (PathLike) – Directory containing final ligand files.

Returns:

Campaign object ready to be serialized or executed.

Return type:

Campaign

Example#

campaign = pipeline.build_campaign(
    receptor_specs=receptor_specs,
    ligand_dir=ligand_dir,
)
print(campaign)

save_campaign(campaign, *, campaign_name=None)#

Save a campaign JSON file under the project directory.

Parameters:

campaign (Campaign) – Campaign instance to serialize.
campaign_name (Optional[str]) – Output JSON file name. If None, the pipeline default is used.

Returns:

Path to the written JSON file.

Return type:

Path

Example#

campaign_json = pipeline.save_campaign(campaign)
print(campaign_json)

crawl_poses(*, backend='auto')#

Crawl docked poses from the project directory.

This wraps prodock.postprocess.pose.PoseCrawler using the pipeline project directory as the crawl root.

Parameters:: backend (str) – Molecule loading backend passed to PoseCrawler.crawl_mols().
Returns:: Crawled pose dataframe.
Return type:: pandas.DataFrame

Example#

pose_df = pipeline.crawl_poses(backend="obabel")
print(pose_df.head())

extract_interactions( *, poses, receptor_specs, batch_size=1, progress=False, n_jobs=1, include_fingerprint_columns=True, include_interaction_events=True, include_bitvectors=False, include_countvectors=False, fail_fast=True, use_profiler=False, )#

Extract protein-ligand interactions from a crawled pose dataframe.

The receptor .pdb mapping is automatically derived from the prepared receptor .pdbqt files by replacing .pdbqt with .pdb.

By default this method uses extract_pose_table_interactions(). Optionally, it can use InteractionProfiler.

Parameters:

poses (pandas.DataFrame) – Pose dataframe returned by crawl_poses().
receptor_specs (Sequence[PreparedReceptorSpec]) – Prepared receptor specifications.
batch_size (int) – Batch size for interaction extraction.
progress (bool) – Whether to show progress.
n_jobs (int) – Number of parallel jobs.
include_fingerprint_columns (bool) – Whether to include fingerprint columns.
include_interaction_events (bool) – Whether to include the long-form interaction-event dataframe.
include_bitvectors (bool) – Whether to include bitvectors.
include_countvectors (bool) – Whether to include countvectors.
fail_fast (bool) – Whether to fail on the first extraction error.
use_profiler (bool) – Whether to use InteractionProfiler instead of the functional wrapper.

Returns:

Tuple (interaction_result, receptor_pdb_by_id).

Return type:

Tuple[Any, Dict[str, Path]]

Example#

interaction_result, receptor_pdb_by_id = pipeline.extract_interactions(
    poses=pose_df,
    receptor_specs=receptor_specs,
    batch_size=1,
    progress=False,
    n_jobs=1,
)

merged_df = interaction_result.merged_df
interaction_df = interaction_result.interaction_df
summary_df = interaction_result.summary_df
compact = interaction_result.summary_dict(kind="compact")

save_database( *, df, interactions_by_pose=None, db_name='prodock.db', replace=True, replace_interactions=True, )#

Create or update a project-local SQLite database and insert results.

Relative database names are created inside project_dir. With the default configuration, the database path is:

<project_dir>/prodock.db

Parameters:

df (pandas.DataFrame) – Dataframe to insert.
interactions_by_pose (Optional[Dict[str, Any]]) – Optional compact interaction dictionary keyed by pose identifier.
db_name (PathLike) – Database filename or relative path under project_dir.
replace (bool) – Whether to replace existing pose rows.
replace_interactions (bool) – Whether to replace existing interaction rows.

Returns:

Absolute path to the SQLite database.

Return type:

Path

Example#

db_path = pipeline.save_database(
    df=merged_df,
    interactions_by_pose=compact,
    db_name="prodock.db",
)
print(db_path)

run( *, receptors=None, prepared_receptors=None, ligands=None, ligand_dir=None, campaign_name=None, crawl_backend='backend', extract_interaction=False, interaction_batch_size=1, interaction_progress=False, interaction_n_jobs=1, include_fingerprint_columns=True, include_interaction_events=True, include_bitvectors=False, include_countvectors=False, fail_fast=True, use_interaction_profiler=False, save_to_database=True, db_name='prodock.db', replace=True, replace_interactions=True, )#

Execute the full ProDock pipeline.

This method performs the full end-to-end automation:

prepare or validate receptors
prepare or resolve ligands
build and save the campaign JSON
run batch docking
run pose crawling on the project directory
optionally extract interactions
optionally create <project_dir>/prodock.db and insert results

Parameters:

receptors (Optional[List[Dict[str, Any]]]) – Raw receptor records for full receptor acquisition and preparation mode.
prepared_receptors (Optional[List[Dict[str, Any]]]) – Already prepared receptor records for direct docking mode.
ligands (Optional[List[Dict[str, str]]]) – Ligand SMILES records for ligand preparation mode.
ligand_dir (Optional[PathLike]) – Existing prepared ligand directory for direct docking mode.
campaign_name (Optional[str]) – Output campaign JSON file name.
crawl_backend (str) – Backend passed to crawl_poses().
extract_interaction (bool) – Whether to run interaction extraction after pose crawling.
interaction_batch_size (int) – Interaction extraction batch size.
interaction_progress (bool) – Whether to show progress during interaction extraction.
interaction_n_jobs (int) – Parallel jobs for interaction extraction.
include_fingerprint_columns (bool) – Whether to include fingerprint columns in interaction output.
include_interaction_events (bool) – Whether to include long-form interaction events.
include_bitvectors (bool) – Whether to include bitvectors.
include_countvectors (bool) – Whether to include countvectors.
fail_fast (bool) – Whether interaction extraction should fail immediately on errors.
use_interaction_profiler (bool) – Whether to use InteractionProfiler for interaction extraction.
save_to_database (bool) – Whether to create or update the SQLite database and insert results.
db_name (PathLike) – Database filename or relative path under project_dir.
replace (bool) – Whether database insertion should replace existing pose rows.
replace_interactions (bool) – Whether database insertion should replace existing interaction rows.

Returns:

Structured pipeline result.

Return type:

ProDockResult

Example#

result = pipeline.run(
    receptors=RECEPTORS,
    ligands=LIGANDS,
    extract_interaction=True,
    db_name="prodock.db",
)

print(result.campaign_json)
print(result.db_path)
print(result.pose_df.head())
print(result.merged_df.head())

Example#

result = pipeline.run(
    prepared_receptors=[
        {
            "receptor_id": "4WKQ",
            "receptor_pdbqt": "Data/testcase/Multi/4WKQ/filtered_protein/4WKQ.pdbqt",
            "center": (2.865, 193.257, 21.367),
            "size": (27.091, 27.091, 27.091),
        }
    ],
    ligand_dir="Data/testcase/Multi/ligands",
    extract_interaction=True,
    interaction_batch_size=1,
    interaction_n_jobs=1,
)

prodock( project_dir, *, receptors=None, prepared_receptors=None, ligands=None, ligand_dir=None, engines=None, cpu=4, seed=42, exhaustiveness=8, n_poses=10, n_jobs=None, progress=True, receptor_use_meeko=False, ligand_output_format='pdbqt', ligand_backend='meeko', box_scale=2.0, box_isotropic=True, campaign_name='campaign.json', crawl_backend='auto', extract_interaction=False, interaction_batch_size=1, interaction_progress=False, interaction_n_jobs=1, include_fingerprint_columns=True, include_interaction_events=True, include_bitvectors=False, include_countvectors=False, fail_fast=True, use_interaction_profiler=False, save_to_database=True, db_name='prodock.db', replace=True, replace_interactions=True, log_file='prodock.log', log_level='INFO', log_colored=True, log_json=False, )#

Functional wrapper around ProDockPipeline.

This is the main convenience entry point for running a complete ProDock workflow in one call.

Parameters:

project_dir (PathLike) – Root project directory.
receptors (Optional[List[Dict[str, Any]]]) – Raw receptor records.
prepared_receptors (Optional[List[Dict[str, Any]]]) – Already prepared receptor records.
ligands (Optional[List[Dict[str, str]]]) – Ligand SMILES records.
ligand_dir (Optional[PathLike]) – Existing ligand directory.
engines (Optional[Sequence[str]]) – Docking engines to include. Default is ["smina", "qvina"].
cpu (int) – CPU value stored in the campaign.
seed (int) – Random seed stored in the campaign.
exhaustiveness (int) – Exhaustiveness stored in the campaign.
n_poses (int) – Number of poses stored in the campaign.
n_jobs (Optional[int]) – Parallel jobs used by BatchDock.
progress (bool) – Whether to enable docking progress reporting.
receptor_use_meeko (bool) – Whether receptor preparation uses Meeko.
ligand_output_format (str) – Final ligand output format.
ligand_backend (str) – Ligand conversion backend.
box_scale (float) – Box scale factor used for ligand-derived boxes.
box_isotropic (bool) – Whether ligand-derived boxes are isotropic.
campaign_name (str) – Output campaign JSON file name.
crawl_backend (str) – Pose crawler backend.
extract_interaction (bool) – Whether to run interaction extraction.
interaction_batch_size (int) – Interaction extraction batch size.
interaction_progress (bool) – Whether to show interaction extraction progress.
interaction_n_jobs (int) – Parallel jobs for interaction extraction.
include_fingerprint_columns (bool) – Whether to include fingerprint columns.
include_interaction_events (bool) – Whether to include long-form interaction events.
include_bitvectors (bool) – Whether to include bitvectors.
include_countvectors (bool) – Whether to include countvectors.
fail_fast (bool) – Whether interaction extraction fails immediately on errors.
use_interaction_profiler (bool) – Whether to use InteractionProfiler.
save_to_database (bool) – Whether to write results into the SQLite database.
db_name (PathLike) – Database filename or relative path under project_dir.
replace (bool) – Whether to replace existing pose rows in the database.
replace_interactions (bool) – Whether to replace existing interaction rows in the database.
log_file (str)
log_level (str | int)
log_colored (bool)
log_json (bool)

Returns:

Structured pipeline result.

Return type:

ProDockResult

Example#

from prodock import prodock

PROJECT = "Data/testcase/Multi"

RECEPTORS = [
    {
        "pdb_id": "4WKQ",
        "receptor_name": "EGFR_4WKQ",
        "ligand_code": "IRE",
        "chains": ["A"],
        "cofactors": [],
    },
]

LIGANDS = [
    {
        "id": "erlotinib",
        "smiles": "COCCOc1cc2c(ncnc2cc1OCCOC)Nc1cccc(c1)C#C",
    },
    {
        "id": "gefitinib",
        "smiles": "COc1cc2ncnc(c2cc1OCCCN1CCOCC1)Nc1ccc(c(c1)Cl)F",
    },
]

result = prodock(
    PROJECT,
    receptors=RECEPTORS,
    ligands=LIGANDS,
    engines=["smina"],
    extract_interaction=True,
    db_name="prodock.db",
)

print(result.campaign_json)
print(result.db_path)
print(result.receptor_pdb_by_id)

Example#

from prodock import prodock

PROJECT = "Data/testcase/Multi"

result = prodock(
    PROJECT,
    prepared_receptors=[
        {
            "receptor_id": "4WKQ",
            "receptor_pdbqt": "Data/testcase/Multi/4WKQ/filtered_protein/4WKQ.pdbqt",
            "center": (2.865, 193.257, 21.367),
            "size": (27.091, 27.091, 27.091),
        }
    ],
    ligand_dir="Data/testcase/Multi/ligands",
    extract_interaction=True,
    db_name="prodock.db",
)

print(result.summary_df.head())

run_prodock( project_dir, *, receptors=None, prepared_receptors=None, ligands=None, ligand_dir=None, engines=None, cpu=4, seed=42, exhaustiveness=8, n_poses=10, n_jobs=None, progress=True, receptor_use_meeko=False, ligand_output_format='pdbqt', ligand_backend='meeko', box_scale=2.0, box_isotropic=True, campaign_name='campaign.json', crawl_backend='auto', extract_interaction=False, interaction_batch_size=1, interaction_progress=False, interaction_n_jobs=1, include_fingerprint_columns=True, include_interaction_events=True, include_bitvectors=False, include_countvectors=False, fail_fast=True, use_interaction_profiler=False, save_to_database=True, db_name='prodock.db', replace=True, replace_interactions=True, log_file='prodock.log', log_level='INFO', log_colored=True, log_json=False, )#

Functional wrapper around ProDockPipeline.

This is the main convenience entry point for running a complete ProDock workflow in one call.

Parameters:

project_dir (PathLike) – Root project directory.
receptors (Optional[List[Dict[str, Any]]]) – Raw receptor records.
prepared_receptors (Optional[List[Dict[str, Any]]]) – Already prepared receptor records.
ligands (Optional[List[Dict[str, str]]]) – Ligand SMILES records.
ligand_dir (Optional[PathLike]) – Existing ligand directory.
engines (Optional[Sequence[str]]) – Docking engines to include. Default is ["smina", "qvina"].
cpu (int) – CPU value stored in the campaign.
seed (int) – Random seed stored in the campaign.
exhaustiveness (int) – Exhaustiveness stored in the campaign.
n_poses (int) – Number of poses stored in the campaign.
n_jobs (Optional[int]) – Parallel jobs used by BatchDock.
progress (bool) – Whether to enable docking progress reporting.
receptor_use_meeko (bool) – Whether receptor preparation uses Meeko.
ligand_output_format (str) – Final ligand output format.
ligand_backend (str) – Ligand conversion backend.
box_scale (float) – Box scale factor used for ligand-derived boxes.
box_isotropic (bool) – Whether ligand-derived boxes are isotropic.
campaign_name (str) – Output campaign JSON file name.
crawl_backend (str) – Pose crawler backend.
extract_interaction (bool) – Whether to run interaction extraction.
interaction_batch_size (int) – Interaction extraction batch size.
interaction_progress (bool) – Whether to show interaction extraction progress.
interaction_n_jobs (int) – Parallel jobs for interaction extraction.
include_fingerprint_columns (bool) – Whether to include fingerprint columns.
include_interaction_events (bool) – Whether to include long-form interaction events.
include_bitvectors (bool) – Whether to include bitvectors.
include_countvectors (bool) – Whether to include countvectors.
fail_fast (bool) – Whether interaction extraction fails immediately on errors.
use_interaction_profiler (bool) – Whether to use InteractionProfiler.
save_to_database (bool) – Whether to write results into the SQLite database.
db_name (PathLike) – Database filename or relative path under project_dir.
replace (bool) – Whether to replace existing pose rows in the database.
replace_interactions (bool) – Whether to replace existing interaction rows in the database.
log_file (str)
log_level (str | int)
log_colored (bool)
log_json (bool)

Returns:

Structured pipeline result.

Return type:

ProDockResult

Example#

from prodock import prodock

PROJECT = "Data/testcase/Multi"

RECEPTORS = [
    {
        "pdb_id": "4WKQ",
        "receptor_name": "EGFR_4WKQ",
        "ligand_code": "IRE",
        "chains": ["A"],
        "cofactors": [],
    },
]

LIGANDS = [
    {
        "id": "erlotinib",
        "smiles": "COCCOc1cc2c(ncnc2cc1OCCOC)Nc1cccc(c1)C#C",
    },
    {
        "id": "gefitinib",
        "smiles": "COc1cc2ncnc(c2cc1OCCCN1CCOCC1)Nc1ccc(c(c1)Cl)F",
    },
]

result = prodock(
    PROJECT,
    receptors=RECEPTORS,
    ligands=LIGANDS,
    engines=["smina"],
    extract_interaction=True,
    db_name="prodock.db",
)

print(result.campaign_json)
print(result.db_path)
print(result.receptor_pdb_by_id)

Example#

from prodock import prodock

PROJECT = "Data/testcase/Multi"

result = prodock(
    PROJECT,
    prepared_receptors=[
        {
            "receptor_id": "4WKQ",
            "receptor_pdbqt": "Data/testcase/Multi/4WKQ/filtered_protein/4WKQ.pdbqt",
            "center": (2.865, 193.257, 21.367),
            "size": (27.091, 27.091, 27.091),
        }
    ],
    ligand_dir="Data/testcase/Multi/ligands",
    extract_interaction=True,
    db_name="prodock.db",
)

print(result.summary_df.head())