zsasa
zsasa: Fast SASA calculation using Zig.
This package provides Python bindings for the zsasa library, a high-performance implementation of Solvent Accessible Surface Area (SASA) calculation algorithms.
Example:
import numpy as np from zsasa import calculate_sasa
Single atom
coords = np.array([[0.0, 0.0, 0.0]]) radii = np.array([1.5]) result = calculate_sasa(coords, radii) print(f"Total SASA: {result.total_area:.2f} Ų")
>>> # Classify atoms >>> from zsasa import classify_atoms, get_radius >>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"]) >>> print(result.radii) # [1.87, 1.4]Integrations: For structure file support, use the gemmi integration:
>>> # pip install zsasa[gemmi] >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure >>> result = calculate_sasa_from_structure("protein.cif") >>> print(f"Total: {result.total_area:.1f} Ų")Analysis: For per-residue aggregation and RSA calculation:
>>> from zsasa import aggregate_from_result >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure >>> result = calculate_sasa_from_structure("protein.cif") >>> residues = aggregate_from_result(result) >>> for res in residues: ... if res.rsa is not None: ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}")MDTraj Integration: For MD trajectory analysis (requires mdtraj):
>>> # pip install mdtraj >>> from zsasa.mdtraj import compute_sasa >>> import mdtraj as md >>> traj = md.load('trajectory.xtc', top='topology.pdb') >>> sasa = compute_sasa(traj) # Returns (n_frames, n_atoms) in nm²MDAnalysis Integration: For MD trajectory analysis with MDAnalysis (requires MDAnalysis):
>>> # pip install MDAnalysis >>> import MDAnalysis as mda >>> from zsasa.mdanalysis import SASAAnalysis >>> u = mda.Universe('topology.pdb', 'trajectory.xtc') >>> sasa = SASAAnalysis(u, select='protein') >>> sasa.run() >>> print(sasa.results.total_area) # Returns per-frame SASA in ŲNative XTC Reader: For standalone XTC reading without MDTraj/MDAnalysis dependencies:
>>> from zsasa.xtc import XtcReader, compute_sasa_trajectory >>> # Low-level reader API >>> with XtcReader("trajectory.xtc") as reader: ... for frame in reader: ... print(f"Step {frame.step}") >>> >>> # High-level SASA calculation (radii from topology) >>> result = compute_sasa_trajectory("trajectory.xtc", radii) >>> print(result.total_areas) # Per-frame SASA in ŲNative DCD Reader: For standalone DCD reading (NAMD/CHARMM format):
>>> from zsasa.dcd import DcdReader, compute_sasa_trajectory >>> with DcdReader("trajectory.dcd") as reader: ... for frame in reader: ... print(f"Step {frame.step}, {frame.natoms} atoms") >>> >>> result = compute_sasa_trajectory("trajectory.dcd", radii) >>> print(result.total_areas) # Per-frame SASA in Ų
1"""zsasa: Fast SASA calculation using Zig. 2 3This package provides Python bindings for the zsasa library, 4a high-performance implementation of Solvent Accessible Surface Area (SASA) 5calculation algorithms. 6 7Example: 8 >>> import numpy as np 9 >>> from zsasa import calculate_sasa 10 >>> 11 >>> # Single atom 12 >>> coords = np.array([[0.0, 0.0, 0.0]]) 13 >>> radii = np.array([1.5]) 14 >>> result = calculate_sasa(coords, radii) 15 >>> print(f"Total SASA: {result.total_area:.2f} Ų") 16 17 >>> # Classify atoms 18 >>> from zsasa import classify_atoms, get_radius 19 >>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"]) 20 >>> print(result.radii) # [1.87, 1.4] 21 22Integrations: 23 For structure file support, use the gemmi integration: 24 25 >>> # pip install zsasa[gemmi] 26 >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure 27 >>> result = calculate_sasa_from_structure("protein.cif") 28 >>> print(f"Total: {result.total_area:.1f} Ų") 29 30Analysis: 31 For per-residue aggregation and RSA calculation: 32 33 >>> from zsasa import aggregate_from_result 34 >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure 35 >>> result = calculate_sasa_from_structure("protein.cif") 36 >>> residues = aggregate_from_result(result) 37 >>> for res in residues: 38 ... if res.rsa is not None: 39 ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}") 40 41MDTraj Integration: 42 For MD trajectory analysis (requires mdtraj): 43 44 >>> # pip install mdtraj 45 >>> from zsasa.mdtraj import compute_sasa 46 >>> import mdtraj as md 47 >>> traj = md.load('trajectory.xtc', top='topology.pdb') 48 >>> sasa = compute_sasa(traj) # Returns (n_frames, n_atoms) in nm² 49 50MDAnalysis Integration: 51 For MD trajectory analysis with MDAnalysis (requires MDAnalysis): 52 53 >>> # pip install MDAnalysis 54 >>> import MDAnalysis as mda 55 >>> from zsasa.mdanalysis import SASAAnalysis 56 >>> u = mda.Universe('topology.pdb', 'trajectory.xtc') 57 >>> sasa = SASAAnalysis(u, select='protein') 58 >>> sasa.run() 59 >>> print(sasa.results.total_area) # Returns per-frame SASA in Ų 60 61Native XTC Reader: 62 For standalone XTC reading without MDTraj/MDAnalysis dependencies: 63 64 >>> from zsasa.xtc import XtcReader, compute_sasa_trajectory 65 >>> # Low-level reader API 66 >>> with XtcReader("trajectory.xtc") as reader: 67 ... for frame in reader: 68 ... print(f"Step {frame.step}") 69 >>> 70 >>> # High-level SASA calculation (radii from topology) 71 >>> result = compute_sasa_trajectory("trajectory.xtc", radii) 72 >>> print(result.total_areas) # Per-frame SASA in Ų 73 74Native DCD Reader: 75 For standalone DCD reading (NAMD/CHARMM format): 76 77 >>> from zsasa.dcd import DcdReader, compute_sasa_trajectory 78 >>> with DcdReader("trajectory.dcd") as reader: 79 ... for frame in reader: 80 ... print(f"Step {frame.step}, {frame.natoms} atoms") 81 >>> 82 >>> result = compute_sasa_trajectory("trajectory.dcd", radii) 83 >>> print(result.total_areas) # Per-frame SASA in Ų 84""" 85 86from zsasa._ffi import get_version 87from zsasa.analysis import ( 88 ResidueResult, 89 aggregate_by_residue, 90 aggregate_from_result, 91) 92from zsasa.batch import BatchDirResult, process_directory 93from zsasa.classifier import ( 94 AtomClass, 95 ClassificationResult, 96 ClassifierType, 97 classify_atoms, 98 get_atom_class, 99 get_radius, 100 guess_radius, 101 guess_radius_from_atom_name, 102) 103from zsasa.rsa import ( 104 MAX_SASA, 105 calculate_rsa, 106 calculate_rsa_batch, 107 get_max_sasa, 108) 109from zsasa.sasa import ( 110 BatchSasaResult, 111 SasaResult, 112 calculate_sasa, 113 calculate_sasa_batch, 114) 115 116__all__ = [ 117 # SASA calculation 118 "calculate_sasa", 119 "calculate_sasa_batch", 120 "SasaResult", 121 "BatchSasaResult", 122 # Batch directory processing 123 "process_directory", 124 "BatchDirResult", 125 # Classifier 126 "ClassifierType", 127 "AtomClass", 128 "ClassificationResult", 129 "get_radius", 130 "get_atom_class", 131 "guess_radius", 132 "guess_radius_from_atom_name", 133 "classify_atoms", 134 # RSA 135 "MAX_SASA", 136 "get_max_sasa", 137 "calculate_rsa", 138 "calculate_rsa_batch", 139 # Analysis 140 "ResidueResult", 141 "aggregate_by_residue", 142 "aggregate_from_result", 143 # Utility 144 "get_version", 145] 146 147__version__ = get_version()
38def calculate_sasa( 39 coords: NDArray[np.float64], 40 radii: NDArray[np.float64], 41 *, 42 algorithm: Literal["sr", "lr"] = "sr", 43 n_points: int = 100, 44 n_slices: int = 20, 45 probe_radius: float = 1.4, 46 n_threads: int = 0, 47 use_bitmask: bool = False, 48) -> SasaResult: 49 """Calculate Solvent Accessible Surface Area (SASA). 50 51 Args: 52 coords: Atom coordinates as (N, 3) array. 53 radii: Atom radii as (N,) array. 54 algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). 55 n_points: Number of test points per atom (for SR algorithm). Default: 100. 56 n_slices: Number of slices per atom (for LR algorithm). Default: 20. 57 probe_radius: Water probe radius in Angstroms. Default: 1.4. 58 n_threads: Number of threads to use. 0 = auto-detect. Default: 0. 59 use_bitmask: Use bitmask LUT optimization for SR algorithm. 60 Supports n_points 1..1024. Default: False. 61 62 Returns: 63 SasaResult containing total_area and per-atom atom_areas. 64 65 Raises: 66 ValueError: If input arrays have invalid shapes or calculation fails. 67 68 Example: 69 >>> import numpy as np 70 >>> from zsasa import calculate_sasa 71 >>> 72 >>> # Two atoms 73 >>> coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]]) 74 >>> radii = np.array([1.5, 1.5]) 75 >>> result = calculate_sasa(coords, radii) 76 >>> print(f"Total: {result.total_area:.2f}") 77 """ 78 ffi, lib = _get_lib() 79 80 # Validate parameters 81 if n_points <= 0: 82 msg = f"n_points must be positive, got {n_points}" 83 raise ValueError(msg) 84 if n_slices <= 0: 85 msg = f"n_slices must be positive, got {n_slices}" 86 raise ValueError(msg) 87 if probe_radius <= 0: 88 msg = f"probe_radius must be positive, got {probe_radius}" 89 raise ValueError(msg) 90 if n_threads < 0: 91 msg = f"n_threads must be non-negative, got {n_threads}" 92 raise ValueError(msg) 93 94 # Validate bitmask constraints 95 if use_bitmask: 96 use_bitmask = _validate_bitmask_params(algorithm, n_points) 97 98 # Validate and convert inputs 99 coords = np.ascontiguousarray(coords, dtype=np.float64) 100 radii = np.ascontiguousarray(radii, dtype=np.float64) 101 102 if coords.ndim != 2 or coords.shape[1] != 3: 103 msg = f"coords must be (N, 3) array, got shape {coords.shape}" 104 raise ValueError(msg) 105 106 n_atoms = coords.shape[0] 107 if radii.shape != (n_atoms,): 108 msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}" 109 raise ValueError(msg) 110 111 if np.any(radii < 0): 112 msg = "All radii must be non-negative" 113 raise ValueError(msg) 114 115 # Extract x, y, z as contiguous arrays 116 x = np.ascontiguousarray(coords[:, 0]) 117 y = np.ascontiguousarray(coords[:, 1]) 118 z = np.ascontiguousarray(coords[:, 2]) 119 120 # Allocate output arrays 121 atom_areas = np.zeros(n_atoms, dtype=np.float64) 122 total_area = ffi.new("double*") 123 124 # Get cffi pointers from numpy arrays 125 x_ptr = ffi.cast("double*", x.ctypes.data) 126 y_ptr = ffi.cast("double*", y.ctypes.data) 127 z_ptr = ffi.cast("double*", z.ctypes.data) 128 radii_ptr = ffi.cast("double*", radii.ctypes.data) 129 areas_ptr = ffi.cast("double*", atom_areas.ctypes.data) 130 131 # Call the appropriate function 132 if use_bitmask: 133 result = lib.zsasa_calc_sr_bitmask( 134 x_ptr, 135 y_ptr, 136 z_ptr, 137 radii_ptr, 138 n_atoms, 139 n_points, 140 probe_radius, 141 n_threads, 142 areas_ptr, 143 total_area, 144 ) 145 elif algorithm == "sr": 146 result = lib.zsasa_calc_sr( 147 x_ptr, 148 y_ptr, 149 z_ptr, 150 radii_ptr, 151 n_atoms, 152 n_points, 153 probe_radius, 154 n_threads, 155 areas_ptr, 156 total_area, 157 ) 158 elif algorithm == "lr": 159 result = lib.zsasa_calc_lr( 160 x_ptr, 161 y_ptr, 162 z_ptr, 163 radii_ptr, 164 n_atoms, 165 n_slices, 166 probe_radius, 167 n_threads, 168 areas_ptr, 169 total_area, 170 ) 171 else: 172 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 173 raise ValueError(msg) 174 175 # Check for errors 176 if result == ZSASA_ERROR_INVALID_INPUT: 177 msg = "Invalid input to SASA calculation" 178 raise ValueError(msg) 179 elif result == ZSASA_ERROR_OUT_OF_MEMORY: 180 msg = "Out of memory during SASA calculation" 181 raise MemoryError(msg) 182 elif result == ZSASA_ERROR_CALCULATION: 183 msg = "Error during SASA calculation" 184 raise RuntimeError(msg) 185 elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS: 186 msg = ( 187 f"Unsupported n_points for bitmask: {n_points}. " 188 f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}" 189 ) 190 raise ValueError(msg) 191 elif result != ZSASA_OK: 192 msg = f"Unknown error code: {result}" 193 raise RuntimeError(msg) 194 195 return SasaResult( 196 total_area=total_area[0], 197 atom_areas=atom_areas, 198 )
Calculate Solvent Accessible Surface Area (SASA).
Args: coords: Atom coordinates as (N, 3) array. radii: Atom radii as (N,) array. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.
Returns: SasaResult containing total_area and per-atom atom_areas.
Raises: ValueError: If input arrays have invalid shapes or calculation fails.
Example:
import numpy as np from zsasa import calculate_sasa
Two atoms
coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]]) radii = np.array([1.5, 1.5]) result = calculate_sasa(coords, radii) print(f"Total: {result.total_area:.2f}")
231def calculate_sasa_batch( 232 coordinates: NDArray[np.floating], 233 radii: NDArray[np.floating], 234 *, 235 algorithm: Literal["sr", "lr"] = "sr", 236 n_points: int = 100, 237 n_slices: int = 20, 238 probe_radius: float = 1.4, 239 n_threads: int = 0, 240 precision: Literal["f64", "f32"] = "f64", 241 use_bitmask: bool = False, 242) -> BatchSasaResult: 243 """Calculate SASA for multiple frames (batch processing). 244 245 Optimized for MD trajectory analysis where the same atoms are processed 246 across multiple frames. Parallelizes across frames for maximum performance. 247 248 Args: 249 coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms. 250 radii: Atom radii as (n_atoms,) array in Angstroms. 251 algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). 252 n_points: Number of test points per atom (for SR algorithm). Default: 100. 253 n_slices: Number of slices per atom (for LR algorithm). Default: 20. 254 probe_radius: Water probe radius in Angstroms. Default: 1.4. 255 n_threads: Number of threads to use. 0 = auto-detect. Default: 0. 256 precision: Internal calculation precision: "f64" (default, higher precision) 257 or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64". 258 use_bitmask: Use bitmask LUT optimization for SR algorithm. 259 Supports n_points 1..1024. Default: False. 260 261 Returns: 262 BatchSasaResult containing per-atom SASA for all frames. 263 264 Raises: 265 ValueError: If input arrays have invalid shapes or calculation fails. 266 267 Example: 268 >>> import numpy as np 269 >>> from zsasa import calculate_sasa_batch 270 >>> 271 >>> # 10 frames, 100 atoms 272 >>> coords = np.random.randn(10, 100, 3).astype(np.float32) 273 >>> radii = np.full(100, 1.5, dtype=np.float32) 274 >>> result = calculate_sasa_batch(coords, radii) 275 >>> print(f"Shape: {result.atom_areas.shape}") # (10, 100) 276 >>> print(f"Total SASA per frame: {result.total_areas}") 277 """ 278 ffi, lib = _get_lib() 279 280 # Validate parameters 281 if n_points <= 0: 282 msg = f"n_points must be positive, got {n_points}" 283 raise ValueError(msg) 284 if n_slices <= 0: 285 msg = f"n_slices must be positive, got {n_slices}" 286 raise ValueError(msg) 287 if probe_radius <= 0: 288 msg = f"probe_radius must be positive, got {probe_radius}" 289 raise ValueError(msg) 290 if n_threads < 0: 291 msg = f"n_threads must be non-negative, got {n_threads}" 292 raise ValueError(msg) 293 294 # Validate bitmask constraints 295 if use_bitmask: 296 use_bitmask = _validate_bitmask_params(algorithm, n_points) 297 298 # Validate and convert inputs 299 coordinates = np.ascontiguousarray(coordinates, dtype=np.float32) 300 radii = np.ascontiguousarray(radii, dtype=np.float32) 301 302 if coordinates.ndim != 3 or coordinates.shape[2] != 3: 303 msg = f"coordinates must be (n_frames, n_atoms, 3) array, got shape {coordinates.shape}" 304 raise ValueError(msg) 305 306 n_frames = coordinates.shape[0] 307 n_atoms = coordinates.shape[1] 308 309 if radii.shape != (n_atoms,): 310 msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}" 311 raise ValueError(msg) 312 313 if np.any(radii < 0): 314 msg = "All radii must be non-negative" 315 raise ValueError(msg) 316 317 # Allocate output array 318 atom_areas = np.zeros((n_frames, n_atoms), dtype=np.float32) 319 320 # Get cffi pointers from numpy arrays 321 coords_ptr = ffi.cast("float*", coordinates.ctypes.data) 322 radii_ptr = ffi.cast("float*", radii.ctypes.data) 323 areas_ptr = ffi.cast("float*", atom_areas.ctypes.data) 324 325 # Call the appropriate batch function based on algorithm, precision, and bitmask 326 if use_bitmask: 327 if precision == "f32": 328 result = lib.zsasa_calc_sr_batch_bitmask_f32( 329 coords_ptr, 330 n_frames, 331 n_atoms, 332 radii_ptr, 333 n_points, 334 probe_radius, 335 n_threads, 336 areas_ptr, 337 ) 338 else: 339 result = lib.zsasa_calc_sr_batch_bitmask( 340 coords_ptr, 341 n_frames, 342 n_atoms, 343 radii_ptr, 344 n_points, 345 probe_radius, 346 n_threads, 347 areas_ptr, 348 ) 349 elif precision == "f64": 350 # Default: f32 I/O with f64 internal precision 351 if algorithm == "sr": 352 result = lib.zsasa_calc_sr_batch( 353 coords_ptr, 354 n_frames, 355 n_atoms, 356 radii_ptr, 357 n_points, 358 probe_radius, 359 n_threads, 360 areas_ptr, 361 ) 362 elif algorithm == "lr": 363 result = lib.zsasa_calc_lr_batch( 364 coords_ptr, 365 n_frames, 366 n_atoms, 367 radii_ptr, 368 n_slices, 369 probe_radius, 370 n_threads, 371 areas_ptr, 372 ) 373 else: 374 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 375 raise ValueError(msg) 376 elif precision == "f32": 377 # Pure f32 precision (matches RustSASA/mdsasa-bolt) 378 if algorithm == "sr": 379 result = lib.zsasa_calc_sr_batch_f32( 380 coords_ptr, 381 n_frames, 382 n_atoms, 383 radii_ptr, 384 n_points, 385 probe_radius, 386 n_threads, 387 areas_ptr, 388 ) 389 elif algorithm == "lr": 390 result = lib.zsasa_calc_lr_batch_f32( 391 coords_ptr, 392 n_frames, 393 n_atoms, 394 radii_ptr, 395 n_slices, 396 probe_radius, 397 n_threads, 398 areas_ptr, 399 ) 400 else: 401 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 402 raise ValueError(msg) 403 else: 404 msg = f"Unknown precision: {precision}. Use 'f64' or 'f32'." 405 raise ValueError(msg) 406 407 # Check for errors 408 if result == ZSASA_ERROR_INVALID_INPUT: 409 msg = "Invalid input to batch SASA calculation" 410 raise ValueError(msg) 411 elif result == ZSASA_ERROR_OUT_OF_MEMORY: 412 msg = "Out of memory during batch SASA calculation" 413 raise MemoryError(msg) 414 elif result == ZSASA_ERROR_CALCULATION: 415 msg = "Error during batch SASA calculation" 416 raise RuntimeError(msg) 417 elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS: 418 msg = ( 419 f"Unsupported n_points for bitmask: {n_points}. " 420 f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}" 421 ) 422 raise ValueError(msg) 423 elif result != ZSASA_OK: 424 msg = f"Unknown error code: {result}" 425 raise RuntimeError(msg) 426 427 return BatchSasaResult(atom_areas=atom_areas)
Calculate SASA for multiple frames (batch processing).
Optimized for MD trajectory analysis where the same atoms are processed across multiple frames. Parallelizes across frames for maximum performance.
Args: coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms. radii: Atom radii as (n_atoms,) array in Angstroms. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. precision: Internal calculation precision: "f64" (default, higher precision) or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64". use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.
Returns: BatchSasaResult containing per-atom SASA for all frames.
Raises: ValueError: If input arrays have invalid shapes or calculation fails.
Example:
import numpy as np from zsasa import calculate_sasa_batch
10 frames, 100 atoms
coords = np.random.randn(10, 100, 3).astype(np.float32) radii = np.full(100, 1.5, dtype=np.float32) result = calculate_sasa_batch(coords, radii) print(f"Shape: {result.atom_areas.shape}") # (10, 100) print(f"Total SASA per frame: {result.total_areas}")
25@dataclass 26class SasaResult: 27 """Result of SASA calculation. 28 29 Attributes: 30 total_area: Total solvent accessible surface area in Ų. 31 atom_areas: Per-atom SASA values in Ų. 32 """ 33 34 total_area: float 35 atom_areas: NDArray[np.float64]
Result of SASA calculation.
Attributes: total_area: Total solvent accessible surface area in Ų. atom_areas: Per-atom SASA values in Ų.
201@dataclass 202class BatchSasaResult: 203 """Result of batch SASA calculation for multiple frames. 204 205 Attributes: 206 atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms). 207 Values are in Angstrom² (Ų). 208 """ 209 210 atom_areas: NDArray[np.float32] 211 212 @property 213 def n_frames(self) -> int: 214 """Number of frames.""" 215 return self.atom_areas.shape[0] 216 217 @property 218 def n_atoms(self) -> int: 219 """Number of atoms.""" 220 return self.atom_areas.shape[1] 221 222 @property 223 def total_areas(self) -> NDArray[np.float32]: 224 """Total SASA per frame, shape (n_frames,).""" 225 return self.atom_areas.sum(axis=1) 226 227 def __repr__(self) -> str: 228 return f"BatchSasaResult(n_frames={self.n_frames}, n_atoms={self.n_atoms})"
Result of batch SASA calculation for multiple frames.
Attributes: atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms). Values are in Angstrom² (Ų).
212 @property 213 def n_frames(self) -> int: 214 """Number of frames.""" 215 return self.atom_areas.shape[0]
Number of frames.
71def process_directory( 72 input_dir: str | Path, 73 *, 74 output_dir: str | Path | None = None, 75 algorithm: Literal["sr", "lr"] = "sr", 76 n_points: int = 100, 77 n_slices: int = 20, 78 probe_radius: float = 1.4, 79 n_threads: int = 0, 80 classifier: ClassifierType | None = ClassifierType.CCD, 81 include_hydrogens: bool = False, 82 include_hetatm: bool = False, 83) -> BatchDirResult: 84 """Process all supported structure files in a directory for SASA calculation. 85 86 Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent), 87 JSON (.json), and their gzip-compressed variants (.gz). 88 89 Args: 90 input_dir: Path to directory containing structure files. 91 output_dir: Optional path for per-file output. None = no file output. 92 algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). 93 n_points: Number of test points per atom (SR only; ignored for LR). 94 Default: 100. 95 n_slices: Number of slices per atom (LR only; ignored for SR). 96 Default: 20. 97 probe_radius: Water probe radius in Angstroms. Default: 1.4. 98 n_threads: Number of threads to use. 0 = auto-detect. Default: 0. 99 classifier: Classifier for radius assignment. None = use input radii. 100 Default: ClassifierType.CCD. 101 include_hydrogens: Whether to include hydrogen atoms. Default: False. 102 include_hetatm: Whether to include HETATM records. Default: False. 103 104 Returns: 105 BatchDirResult with per-file details. 106 107 Raises: 108 ValueError: If input parameters are invalid. 109 FileNotFoundError: If the input directory does not exist. 110 MemoryError: If out of memory. 111 RuntimeError: For other processing errors. 112 113 Example: 114 >>> from zsasa import process_directory 115 >>> result = process_directory("path/to/pdbs/") 116 >>> print(f"Processed {result.successful}/{result.total_files} files") 117 """ 118 ffi, lib = _get_lib() 119 120 # Validate algorithm 121 if algorithm == "sr": 122 algo_int = ZSASA_ALGORITHM_SR 123 n_points_val = n_points 124 elif algorithm == "lr": 125 algo_int = ZSASA_ALGORITHM_LR 126 n_points_val = n_slices 127 else: 128 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 129 raise ValueError(msg) 130 131 if n_points <= 0: 132 msg = f"n_points must be positive, got {n_points}" 133 raise ValueError(msg) 134 if n_slices <= 0: 135 msg = f"n_slices must be positive, got {n_slices}" 136 raise ValueError(msg) 137 if probe_radius <= 0: 138 msg = f"probe_radius must be positive, got {probe_radius}" 139 raise ValueError(msg) 140 if n_threads < 0: 141 msg = f"n_threads must be non-negative, got {n_threads}" 142 raise ValueError(msg) 143 144 # Map classifier 145 classifier_int = int(classifier) if classifier is not None else -1 146 147 # Convert paths 148 input_dir_bytes = str(input_dir).encode("utf-8") 149 output_dir_bytes = str(output_dir).encode("utf-8") if output_dir is not None else ffi.NULL 150 151 error_code = ffi.new("int*") 152 153 handle = lib.zsasa_batch_dir_process( 154 input_dir_bytes, 155 output_dir_bytes, 156 algo_int, 157 n_points_val, 158 probe_radius, 159 n_threads, 160 classifier_int, 161 int(include_hydrogens), 162 int(include_hetatm), 163 error_code, 164 ) 165 166 if handle == ffi.NULL: 167 ec = error_code[0] 168 if ec == ZSASA_ERROR_INVALID_INPUT: 169 msg = "Invalid input parameters for directory batch processing" 170 raise ValueError(msg) 171 elif ec == ZSASA_ERROR_OUT_OF_MEMORY: 172 msg = "Out of memory during directory batch processing" 173 raise MemoryError(msg) 174 elif ec == ZSASA_ERROR_CALCULATION: 175 msg = "SASA calculation failed during directory batch processing" 176 raise RuntimeError(msg) 177 elif ec == ZSASA_ERROR_FILE_IO: 178 msg = f"Directory not found or not readable: {input_dir}" 179 raise FileNotFoundError(msg) 180 else: 181 msg = f"Directory batch processing failed with error code: {ec}" 182 raise RuntimeError(msg) 183 184 try: 185 total_files = lib.zsasa_batch_dir_get_total_files(handle) 186 successful = lib.zsasa_batch_dir_get_successful(handle) 187 failed = lib.zsasa_batch_dir_get_failed(handle) 188 189 filenames: list[str] = [] 190 n_atoms_list: list[int] = [] 191 total_sasa_list: list[float] = [] 192 status_list: list[int] = [] 193 194 for i in range(total_files): 195 fname_ptr = lib.zsasa_batch_dir_get_filename(handle, i) 196 if fname_ptr == ffi.NULL: 197 msg = ( 198 f"Internal error: zsasa_batch_dir_get_filename returned NULL " 199 f"for index {i} (total_files={total_files})" 200 ) 201 raise RuntimeError(msg) 202 filenames.append(ffi.string(fname_ptr).decode("utf-8")) 203 n_atoms_list.append(lib.zsasa_batch_dir_get_n_atoms(handle, i)) 204 sasa = lib.zsasa_batch_dir_get_total_sasa(handle, i) 205 total_sasa_list.append(float("nan") if math.isnan(sasa) else sasa) 206 st = lib.zsasa_batch_dir_get_status(handle, i) 207 if st not in (0, 1): 208 msg = f"Internal error: unexpected status {st} for file index {i} (expected 0 or 1)" 209 raise RuntimeError(msg) 210 status_list.append(st) 211 212 return BatchDirResult( 213 total_files=total_files, 214 successful=successful, 215 failed=failed, 216 filenames=filenames, 217 n_atoms=n_atoms_list, 218 total_sasa=total_sasa_list, 219 status=status_list, 220 ) 221 finally: 222 lib.zsasa_batch_dir_free(handle)
Process all supported structure files in a directory for SASA calculation.
Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent), JSON (.json), and their gzip-compressed variants (.gz).
Args: input_dir: Path to directory containing structure files. output_dir: Optional path for per-file output. None = no file output. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (SR only; ignored for LR). Default: 100. n_slices: Number of slices per atom (LR only; ignored for SR). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. classifier: Classifier for radius assignment. None = use input radii. Default: ClassifierType.CCD. include_hydrogens: Whether to include hydrogen atoms. Default: False. include_hetatm: Whether to include HETATM records. Default: False.
Returns: BatchDirResult with per-file details.
Raises: ValueError: If input parameters are invalid. FileNotFoundError: If the input directory does not exist. MemoryError: If out of memory. RuntimeError: For other processing errors.
Example:
from zsasa import process_directory result = process_directory("path/to/pdbs/") print(f"Processed {result.successful}/{result.total_files} files")
23@dataclass 24class BatchDirResult: 25 """Result of directory batch processing. 26 27 Attributes: 28 total_files: Number of supported structure files found in the directory. 29 successful: Number of successfully processed files. 30 failed: Number of files that failed processing. 31 filenames: List of filenames (file name only, without directory path). 32 n_atoms: Per-file atom counts. 33 total_sasa: Per-file total SASA in Angstroms² (NaN for failed files). 34 status: Per-file status (1=ok, 0=failed). 35 """ 36 37 total_files: int 38 successful: int 39 failed: int 40 filenames: list[str] 41 n_atoms: list[int] 42 total_sasa: list[float] 43 status: list[int] 44 45 def __post_init__(self) -> None: 46 n = len(self.filenames) 47 if len(self.n_atoms) != n or len(self.total_sasa) != n or len(self.status) != n: 48 msg = ( 49 f"All per-file lists must have the same length as filenames ({n}), " 50 f"got n_atoms={len(self.n_atoms)}, total_sasa={len(self.total_sasa)}, " 51 f"status={len(self.status)}" 52 ) 53 raise ValueError(msg) 54 if self.total_files != n: 55 msg = f"total_files ({self.total_files}) != len(filenames) ({n})" 56 raise ValueError(msg) 57 if self.successful + self.failed != self.total_files: 58 msg = ( 59 f"successful ({self.successful}) + failed ({self.failed}) " 60 f"!= total_files ({self.total_files})" 61 ) 62 raise ValueError(msg) 63 64 def __repr__(self) -> str: 65 return ( 66 f"BatchDirResult(total_files={self.total_files}, " 67 f"successful={self.successful}, failed={self.failed})" 68 )
Result of directory batch processing.
Attributes: total_files: Number of supported structure files found in the directory. successful: Number of successfully processed files. failed: Number of files that failed processing. filenames: List of filenames (file name only, without directory path). n_atoms: Per-file atom counts. total_sasa: Per-file total SASA in Angstroms² (NaN for failed files). status: Per-file status (1=ok, 0=failed).
26class ClassifierType(IntEnum): 27 """Available classifier types for atom radius assignment. 28 29 Attributes: 30 CCD: CCD-based radii (default). Hardcoded ProtOr radii + runtime CCD 31 analysis for non-standard residues via bond topology. 32 PROTOR: Alias for CCD. Kept for backward compatibility. 33 NACCESS: NACCESS-compatible radii. 34 OONS: Ooi, Oobatake, Nemethy, Scheraga radii. 35 """ 36 37 NACCESS = ZSASA_CLASSIFIER_NACCESS 38 PROTOR = ZSASA_CLASSIFIER_PROTOR 39 OONS = ZSASA_CLASSIFIER_OONS 40 CCD = ZSASA_CLASSIFIER_CCD
Available classifier types for atom radius assignment.
Attributes: CCD: CCD-based radii (default). Hardcoded ProtOr radii + runtime CCD analysis for non-standard residues via bond topology. PROTOR: Alias for CCD. Kept for backward compatibility. NACCESS: NACCESS-compatible radii. OONS: Ooi, Oobatake, Nemethy, Scheraga radii.
43class AtomClass(IntEnum): 44 """Atom polarity classes. 45 46 Attributes: 47 POLAR: Polar atoms (e.g., N, O). 48 APOLAR: Apolar/hydrophobic atoms (e.g., C). 49 UNKNOWN: Unknown classification. 50 """ 51 52 POLAR = ZSASA_ATOM_CLASS_POLAR 53 APOLAR = ZSASA_ATOM_CLASS_APOLAR 54 UNKNOWN = ZSASA_ATOM_CLASS_UNKNOWN
Atom polarity classes.
Attributes: POLAR: Polar atoms (e.g., N, O). APOLAR: Apolar/hydrophobic atoms (e.g., C). UNKNOWN: Unknown classification.
173@dataclass 174class ClassificationResult: 175 """Result of batch atom classification. 176 177 Attributes: 178 radii: Per-atom van der Waals radii in Angstroms. 179 NaN values indicate atoms not found in the classifier. 180 Use np.isnan(result.radii) to find unknown atoms. 181 classes: Per-atom polarity classes (AtomClass constants). 182 183 Example: 184 >>> result = classify_atoms(residues, atoms) 185 >>> unknown_mask = np.isnan(result.radii) 186 >>> if unknown_mask.any(): 187 ... print(f"Found {unknown_mask.sum()} unknown atoms") 188 """ 189 190 radii: NDArray[np.float64] 191 classes: NDArray[np.int32] 192 193 def __repr__(self) -> str: 194 return f"ClassificationResult(n_atoms={len(self.radii)})"
Result of batch atom classification.
Attributes: radii: Per-atom van der Waals radii in Angstroms. NaN values indicate atoms not found in the classifier. Use np.isnan(result.radii) to find unknown atoms. classes: Per-atom polarity classes (AtomClass constants).
Example:
result = classify_atoms(residues, atoms) unknown_mask = np.isnan(result.radii) if unknown_mask.any(): ... print(f"Found {unknown_mask.sum()} unknown atoms")
57def get_radius( 58 residue: str, 59 atom: str, 60 classifier_type: ClassifierType = ClassifierType.CCD, 61) -> float | None: 62 """Get van der Waals radius for an atom using the specified classifier. 63 64 Args: 65 residue: Residue name (e.g., "ALA", "GLY"). 66 atom: Atom name (e.g., "CA", "CB"). 67 classifier_type: Classifier to use. Default: ClassifierType.CCD. 68 69 Returns: 70 Radius in Angstroms, or None if atom is not found in classifier. 71 72 Example: 73 >>> from zsasa import get_radius, ClassifierType 74 >>> get_radius("ALA", "CA") 75 1.87 76 >>> get_radius("ALA", "XX") # Unknown atom 77 None 78 """ 79 _, lib = _get_lib() 80 radius = lib.zsasa_classifier_get_radius( 81 classifier_type, 82 residue.encode("utf-8"), 83 atom.encode("utf-8"), 84 ) 85 if np.isnan(radius): 86 return None 87 return radius
Get van der Waals radius for an atom using the specified classifier.
Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.CCD.
Returns: Radius in Angstroms, or None if atom is not found in classifier.
Example:
from zsasa import get_radius, ClassifierType get_radius("ALA", "CA") 1.87 get_radius("ALA", "XX") # Unknown atom None
90def get_atom_class( 91 residue: str, 92 atom: str, 93 classifier_type: ClassifierType = ClassifierType.CCD, 94) -> int: 95 """Get atom polarity class using the specified classifier. 96 97 Args: 98 residue: Residue name (e.g., "ALA", "GLY"). 99 atom: Atom name (e.g., "CA", "CB"). 100 classifier_type: Classifier to use. Default: ClassifierType.CCD. 101 102 Returns: 103 AtomClass constant (POLAR, APOLAR, or UNKNOWN). 104 105 Example: 106 >>> from zsasa import get_atom_class, AtomClass 107 >>> get_atom_class("ALA", "CA") == AtomClass.APOLAR 108 True 109 >>> get_atom_class("ALA", "O") == AtomClass.POLAR 110 True 111 """ 112 _, lib = _get_lib() 113 return lib.zsasa_classifier_get_class( 114 classifier_type, 115 residue.encode("utf-8"), 116 atom.encode("utf-8"), 117 )
Get atom polarity class using the specified classifier.
Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.CCD.
Returns: AtomClass constant (POLAR, APOLAR, or UNKNOWN).
Example:
from zsasa import get_atom_class, AtomClass get_atom_class("ALA", "CA") == AtomClass.APOLAR True get_atom_class("ALA", "O") == AtomClass.POLAR True
120def guess_radius(element: str) -> float | None: 121 """Guess van der Waals radius from element symbol. 122 123 Args: 124 element: Element symbol (e.g., "C", "N", "FE"). 125 Case-insensitive, whitespace is trimmed. 126 127 Returns: 128 Radius in Angstroms, or None if element is not recognized. 129 130 Example: 131 >>> from zsasa import guess_radius 132 >>> guess_radius("C") 133 1.7 134 >>> guess_radius("FE") 135 1.26 136 >>> guess_radius("XX") # Unknown 137 None 138 """ 139 _, lib = _get_lib() 140 radius = lib.zsasa_guess_radius(element.encode("utf-8")) 141 if np.isnan(radius): 142 return None 143 return radius
Guess van der Waals radius from element symbol.
Args: element: Element symbol (e.g., "C", "N", "FE"). Case-insensitive, whitespace is trimmed.
Returns: Radius in Angstroms, or None if element is not recognized.
Example:
from zsasa import guess_radius guess_radius("C") 1.7 guess_radius("FE") 1.26 guess_radius("XX") # Unknown None
146def guess_radius_from_atom_name(atom_name: str) -> float | None: 147 """Guess van der Waals radius from PDB-style atom name. 148 149 Extracts element symbol from atom name following PDB conventions: 150 - Leading space indicates single-char element (e.g., " CA " = Carbon alpha) 151 - No leading space may indicate 2-char element (e.g., "FE " = Iron) 152 153 Args: 154 atom_name: PDB-style atom name (e.g., " CA ", "FE "). 155 156 Returns: 157 Radius in Angstroms, or None if element cannot be determined. 158 159 Example: 160 >>> from zsasa import guess_radius_from_atom_name 161 >>> guess_radius_from_atom_name(" CA ") # Carbon alpha 162 1.7 163 >>> guess_radius_from_atom_name("FE ") # Iron 164 1.26 165 """ 166 _, lib = _get_lib() 167 radius = lib.zsasa_guess_radius_from_atom_name(atom_name.encode("utf-8")) 168 if np.isnan(radius): 169 return None 170 return radius
Guess van der Waals radius from PDB-style atom name.
Extracts element symbol from atom name following PDB conventions:
- Leading space indicates single-char element (e.g., " CA " = Carbon alpha)
- No leading space may indicate 2-char element (e.g., "FE " = Iron)
Args: atom_name: PDB-style atom name (e.g., " CA ", "FE ").
Returns: Radius in Angstroms, or None if element cannot be determined.
Example:
from zsasa import guess_radius_from_atom_name guess_radius_from_atom_name(" CA ") # Carbon alpha 1.7 guess_radius_from_atom_name("FE ") # Iron 1.26
197def classify_atoms( 198 residues: list[str], 199 atoms: list[str], 200 classifier_type: ClassifierType = ClassifierType.CCD, 201 *, 202 include_classes: bool = True, 203) -> ClassificationResult: 204 """Classify multiple atoms at once (batch operation). 205 206 This is more efficient than calling get_radius for each atom individually. 207 208 Args: 209 residues: List of residue names. 210 atoms: List of atom names (must be same length as residues). 211 classifier_type: Classifier to use. Default: ClassifierType.CCD. 212 include_classes: Whether to compute atom classes. Default: True. 213 214 Returns: 215 ClassificationResult with radii and classes arrays. 216 Unknown atoms have NaN radius and UNKNOWN class. 217 218 Raises: 219 ValueError: If residues and atoms have different lengths. 220 221 Example: 222 >>> from zsasa import classify_atoms 223 >>> result = classify_atoms( 224 ... ["ALA", "ALA", "GLY"], 225 ... ["CA", "O", "N"], 226 ... ) 227 >>> result.radii 228 array([1.87, 1.4 , 1.65]) 229 """ 230 ffi, lib = _get_lib() 231 232 if len(residues) != len(atoms): 233 msg = f"residues and atoms must have same length: {len(residues)} != {len(atoms)}" 234 raise ValueError(msg) 235 236 n_atoms = len(residues) 237 if n_atoms == 0: 238 return ClassificationResult( 239 radii=np.array([], dtype=np.float64), 240 classes=np.array([], dtype=np.int32), 241 ) 242 243 # Encode strings and create cffi arrays 244 # Keep references to prevent garbage collection 245 residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residues] 246 atoms_bytes = [ffi.new("char[]", a.encode("utf-8")) for a in atoms] 247 248 residues_arr = ffi.new("char*[]", residues_bytes) 249 atoms_arr = ffi.new("char*[]", atoms_bytes) 250 251 # Allocate output arrays 252 radii = np.zeros(n_atoms, dtype=np.float64) 253 classes = np.zeros(n_atoms, dtype=np.int32) if include_classes else None 254 255 radii_ptr = ffi.cast("double*", radii.ctypes.data) 256 classes_ptr = ffi.cast("int*", classes.ctypes.data) if include_classes else ffi.NULL 257 258 result = lib.zsasa_classify_atoms( 259 classifier_type, 260 residues_arr, 261 atoms_arr, 262 n_atoms, 263 radii_ptr, 264 classes_ptr, 265 ) 266 267 if result == ZSASA_ERROR_INVALID_INPUT: 268 msg = f"Invalid classifier type: {classifier_type}" 269 raise ValueError(msg) 270 elif result != ZSASA_OK: 271 msg = f"Classification error: {result}" 272 raise RuntimeError(msg) 273 274 if not include_classes: 275 classes = np.full(n_atoms, AtomClass.UNKNOWN, dtype=np.int32) 276 277 return ClassificationResult(radii=radii, classes=classes)
Classify multiple atoms at once (batch operation).
This is more efficient than calling get_radius for each atom individually.
Args: residues: List of residue names. atoms: List of atom names (must be same length as residues). classifier_type: Classifier to use. Default: ClassifierType.CCD. include_classes: Whether to compute atom classes. Default: True.
Returns: ClassificationResult with radii and classes arrays. Unknown atoms have NaN radius and UNKNOWN class.
Raises: ValueError: If residues and atoms have different lengths.
Example:
from zsasa import classify_atoms result = classify_atoms( ... ["ALA", "ALA", "GLY"], ... ["CA", "O", "N"], ... ) result.radii array([1.87, 1.4 , 1.65])
36def get_max_sasa(residue_name: str) -> float | None: 37 """Get maximum SASA value for a standard amino acid. 38 39 Values from Tien et al. (2013) "Maximum allowed solvent accessibilities 40 of residues in proteins". 41 42 Args: 43 residue_name: 3-letter residue code (e.g., "ALA", "GLY"). 44 45 Returns: 46 Maximum SASA in Angstroms², or None if residue is not a standard amino acid. 47 48 Example: 49 >>> from zsasa import get_max_sasa 50 >>> get_max_sasa("ALA") 51 129.0 52 >>> get_max_sasa("TRP") 53 285.0 54 >>> get_max_sasa("HOH") # Water - not a standard amino acid 55 None 56 """ 57 _, lib = _get_lib() 58 max_sasa = lib.zsasa_get_max_sasa(residue_name.encode("utf-8")) 59 if np.isnan(max_sasa): 60 return None 61 return max_sasa
Get maximum SASA value for a standard amino acid.
Values from Tien et al. (2013) "Maximum allowed solvent accessibilities of residues in proteins".
Args: residue_name: 3-letter residue code (e.g., "ALA", "GLY").
Returns: Maximum SASA in Angstroms², or None if residue is not a standard amino acid.
Example:
from zsasa import get_max_sasa get_max_sasa("ALA") 129.0 get_max_sasa("TRP") 285.0 get_max_sasa("HOH") # Water - not a standard amino acid None
64def calculate_rsa(sasa: float, residue_name: str) -> float | None: 65 """Calculate RSA (Relative Solvent Accessibility) for a single residue. 66 67 RSA = SASA / MaxSASA 68 69 Args: 70 sasa: Observed SASA value in Angstroms². 71 residue_name: 3-letter residue code (e.g., "ALA", "GLY"). 72 73 Returns: 74 RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid. 75 Note: RSA > 1.0 is possible for exposed terminal residues. 76 77 Example: 78 >>> from zsasa import calculate_rsa 79 >>> calculate_rsa(64.5, "ALA") # 64.5 / 129.0 = 0.5 80 0.5 81 >>> calculate_rsa(150.0, "GLY") # RSA > 1.0 is possible 82 1.4423076923076923 83 """ 84 _, lib = _get_lib() 85 rsa = lib.zsasa_calculate_rsa(sasa, residue_name.encode("utf-8")) 86 if np.isnan(rsa): 87 return None 88 return rsa
Calculate RSA (Relative Solvent Accessibility) for a single residue.
RSA = SASA / MaxSASA
Args: sasa: Observed SASA value in Angstroms². residue_name: 3-letter residue code (e.g., "ALA", "GLY").
Returns: RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid. Note: RSA > 1.0 is possible for exposed terminal residues.
Example:
from zsasa import calculate_rsa calculate_rsa(64.5, "ALA") # 64.5 / 129.0 = 0.5 0.5 calculate_rsa(150.0, "GLY") # RSA > 1.0 is possible 1.4423076923076923
91def calculate_rsa_batch( 92 sasas: NDArray[np.float64] | list[float], 93 residue_names: list[str], 94) -> NDArray[np.float64]: 95 """Calculate RSA for multiple residues at once (batch operation). 96 97 This is more efficient than calling calculate_rsa for each residue individually. 98 99 Args: 100 sasas: Array of SASA values in Angstroms². 101 residue_names: List of 3-letter residue codes (must be same length as sasas). 102 103 Returns: 104 Array of RSA values. NaN values indicate non-standard amino acids. 105 106 Raises: 107 ValueError: If sasas and residue_names have different lengths. 108 109 Example: 110 >>> import numpy as np 111 >>> from zsasa import calculate_rsa_batch 112 >>> sasas = np.array([64.5, 52.0, 100.0]) 113 >>> residues = ["ALA", "GLY", "HOH"] # HOH is not standard 114 >>> rsa = calculate_rsa_batch(sasas, residues) 115 >>> rsa 116 array([0.5 , 0.5 , nan]) 117 """ 118 ffi, lib = _get_lib() 119 120 sasas = np.ascontiguousarray(sasas, dtype=np.float64) 121 n_residues = len(sasas) 122 123 if len(residue_names) != n_residues: 124 msg = f"sasas and residue_names must have same length: {n_residues} != {len(residue_names)}" 125 raise ValueError(msg) 126 127 if n_residues == 0: 128 return np.array([], dtype=np.float64) 129 130 # Encode strings and create cffi array 131 # Keep references to prevent garbage collection 132 residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residue_names] 133 residues_arr = ffi.new("char*[]", residues_bytes) 134 135 # Allocate output array 136 rsa_out = np.zeros(n_residues, dtype=np.float64) 137 138 sasas_ptr = ffi.cast("double*", sasas.ctypes.data) 139 rsa_ptr = ffi.cast("double*", rsa_out.ctypes.data) 140 141 result = lib.zsasa_calculate_rsa_batch(sasas_ptr, residues_arr, n_residues, rsa_ptr) 142 if result != ZSASA_OK: 143 msg = f"RSA batch calculation failed with error code: {result}" 144 raise RuntimeError(msg) 145 146 return rsa_out
Calculate RSA for multiple residues at once (batch operation).
This is more efficient than calling calculate_rsa for each residue individually.
Args: sasas: Array of SASA values in Angstroms². residue_names: List of 3-letter residue codes (must be same length as sasas).
Returns: Array of RSA values. NaN values indicate non-standard amino acids.
Raises: ValueError: If sasas and residue_names have different lengths.
Example:
import numpy as np from zsasa import calculate_rsa_batch sasas = np.array([64.5, 52.0, 100.0]) residues = ["ALA", "GLY", "HOH"] # HOH is not standard rsa = calculate_rsa_batch(sasas, residues) rsa array([0.5 , 0.5 , nan])
41@dataclass 42class ResidueResult: 43 """Per-residue SASA result. 44 45 Attributes: 46 chain_id: Chain identifier (e.g., "A", "B"). 47 residue_id: Residue sequence number. 48 residue_name: 3-letter residue name (e.g., "ALA", "GLY"). 49 total_area: Total SASA for this residue in A^2. 50 polar_area: Polar SASA (N, O atoms, etc.) in A^2. 51 apolar_area: Apolar SASA (C atoms, etc.) in A^2. 52 rsa: Relative Solvent Accessibility (0.0-1.0+), or None for 53 non-standard amino acids. 54 n_atoms: Number of atoms in this residue. 55 """ 56 57 chain_id: str 58 residue_id: int 59 residue_name: str 60 total_area: float 61 polar_area: float 62 apolar_area: float 63 rsa: float | None 64 n_atoms: int 65 66 def __repr__(self) -> str: 67 rsa_str = f"{self.rsa:.3f}" if self.rsa is not None else "None" 68 return ( 69 f"ResidueResult({self.chain_id}:{self.residue_name}{self.residue_id}, " 70 f"total={self.total_area:.1f}, rsa={rsa_str}, n_atoms={self.n_atoms})" 71 )
Per-residue SASA result.
Attributes: chain_id: Chain identifier (e.g., "A", "B"). residue_id: Residue sequence number. residue_name: 3-letter residue name (e.g., "ALA", "GLY"). total_area: Total SASA for this residue in A^2. polar_area: Polar SASA (N, O atoms, etc.) in A^2. apolar_area: Apolar SASA (C atoms, etc.) in A^2. rsa: Relative Solvent Accessibility (0.0-1.0+), or None for non-standard amino acids. n_atoms: Number of atoms in this residue.
74def aggregate_by_residue( 75 atom_areas: NDArray[np.float64], 76 chain_ids: list[str], 77 residue_ids: list[int], 78 residue_names: list[str], 79 atom_classes: NDArray[np.int32] | None = None, 80) -> list[ResidueResult]: 81 """Aggregate per-atom SASA values to per-residue. 82 83 Groups atoms by (chain_id, residue_id) and sums their SASA values. 84 Also calculates polar/apolar breakdown and RSA if atom classes are provided. 85 86 Args: 87 atom_areas: Per-atom SASA values in A^2. 88 chain_ids: Chain ID for each atom. 89 residue_ids: Residue sequence number for each atom. 90 residue_names: Residue name for each atom. 91 atom_classes: Optional per-atom polarity classes (AtomClass values). 92 If provided, polar_area and apolar_area will be calculated. 93 94 Returns: 95 List of ResidueResult objects, one per unique residue. 96 Results are ordered by appearance in the input (preserves chain/residue order). 97 98 Example: 99 >>> import numpy as np 100 >>> from zsasa.analysis import aggregate_by_residue 101 >>> 102 >>> atom_areas = np.array([10.0, 20.0, 15.0, 25.0]) 103 >>> chain_ids = ["A", "A", "A", "A"] 104 >>> residue_ids = [1, 1, 2, 2] 105 >>> residue_names = ["ALA", "ALA", "GLY", "GLY"] 106 >>> 107 >>> residues = aggregate_by_residue( 108 ... atom_areas, chain_ids, residue_ids, residue_names 109 ... ) 110 >>> len(residues) 111 2 112 >>> residues[0].total_area 113 30.0 114 """ 115 n_atoms = len(atom_areas) 116 if n_atoms == 0: 117 return [] 118 119 # Validate input lengths 120 if len(chain_ids) != n_atoms: 121 msg = f"chain_ids length ({len(chain_ids)}) != atom_areas length ({n_atoms})" 122 raise ValueError(msg) 123 if len(residue_ids) != n_atoms: 124 msg = f"residue_ids length ({len(residue_ids)}) != atom_areas length ({n_atoms})" 125 raise ValueError(msg) 126 if len(residue_names) != n_atoms: 127 msg = f"residue_names length ({len(residue_names)}) != atom_areas length ({n_atoms})" 128 raise ValueError(msg) 129 if atom_classes is not None and len(atom_classes) != n_atoms: 130 msg = f"atom_classes length ({len(atom_classes)}) != atom_areas length ({n_atoms})" 131 raise ValueError(msg) 132 133 # Group atoms by (chain_id, residue_id) 134 # Use dict to preserve insertion order (Python 3.7+) 135 residue_data: dict[tuple[str, int], dict] = {} 136 137 for i in range(n_atoms): 138 key = (chain_ids[i], residue_ids[i]) 139 140 if key not in residue_data: 141 residue_data[key] = { 142 "residue_name": residue_names[i], 143 "total_area": 0.0, 144 "polar_area": 0.0, 145 "apolar_area": 0.0, 146 "n_atoms": 0, 147 } 148 149 data = residue_data[key] 150 area = float(atom_areas[i]) 151 data["total_area"] += area 152 data["n_atoms"] += 1 153 154 if atom_classes is not None: 155 atom_class = atom_classes[i] 156 if atom_class == AtomClass.POLAR: 157 data["polar_area"] += area 158 elif atom_class == AtomClass.APOLAR: 159 data["apolar_area"] += area 160 161 # Build result list 162 results = [] 163 for (chain_id, residue_id), data in residue_data.items(): 164 residue_name = data["residue_name"] 165 total_area = data["total_area"] 166 167 # Calculate RSA if this is a standard amino acid 168 max_sasa = MAX_SASA.get(residue_name) 169 rsa = total_area / max_sasa if max_sasa is not None else None 170 171 results.append( 172 ResidueResult( 173 chain_id=chain_id, 174 residue_id=residue_id, 175 residue_name=residue_name, 176 total_area=total_area, 177 polar_area=data["polar_area"], 178 apolar_area=data["apolar_area"], 179 rsa=rsa, 180 n_atoms=data["n_atoms"], 181 ) 182 ) 183 184 return results
Aggregate per-atom SASA values to per-residue.
Groups atoms by (chain_id, residue_id) and sums their SASA values. Also calculates polar/apolar breakdown and RSA if atom classes are provided.
Args: atom_areas: Per-atom SASA values in A^2. chain_ids: Chain ID for each atom. residue_ids: Residue sequence number for each atom. residue_names: Residue name for each atom. atom_classes: Optional per-atom polarity classes (AtomClass values). If provided, polar_area and apolar_area will be calculated.
Returns: List of ResidueResult objects, one per unique residue. Results are ordered by appearance in the input (preserves chain/residue order).
Example:
import numpy as np from zsasa.analysis import aggregate_by_residue
atom_areas = np.array([10.0, 20.0, 15.0, 25.0]) chain_ids = ["A", "A", "A", "A"] residue_ids = [1, 1, 2, 2] residue_names = ["ALA", "ALA", "GLY", "GLY"]
residues = aggregate_by_residue( ... atom_areas, chain_ids, residue_ids, residue_names ... ) len(residues) 2 residues[0].total_area 30.0
187def aggregate_from_result(result: SasaResultWithAtoms) -> list[ResidueResult]: 188 """Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue. 189 190 This is a convenience wrapper that extracts the necessary data from 191 a SasaResultWithAtoms object returned by gemmi integration functions. 192 193 Args: 194 result: A SasaResultWithAtoms object from calculate_sasa_from_structure 195 or calculate_sasa_from_model. 196 197 Returns: 198 List of ResidueResult objects, one per unique residue. 199 200 Example: 201 >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure 202 >>> from zsasa.analysis import aggregate_from_result 203 >>> 204 >>> result = calculate_sasa_from_structure("protein.cif") 205 >>> residues = aggregate_from_result(result) 206 >>> 207 >>> # Print buried residues (RSA < 0.25) 208 >>> for res in residues: 209 ... if res.rsa is not None and res.rsa < 0.25: 210 ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}") 211 """ 212 return aggregate_by_residue( 213 atom_areas=result.atom_areas, 214 chain_ids=result.atom_data.chain_ids, 215 residue_ids=result.atom_data.residue_ids, 216 residue_names=result.atom_data.residue_names, 217 atom_classes=result.atom_classes, 218 )
Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue.
This is a convenience wrapper that extracts the necessary data from a SasaResultWithAtoms object returned by gemmi integration functions.
Args: result: A SasaResultWithAtoms object from calculate_sasa_from_structure or calculate_sasa_from_model.
Returns: List of ResidueResult objects, one per unique residue.
Example:
from zsasa.integrations.gemmi import calculate_sasa_from_structure from zsasa.analysis import aggregate_from_result
result = calculate_sasa_from_structure("protein.cif") residues = aggregate_from_result(result)
Print buried residues (RSA < 0.25)
for res in residues: ... if res.rsa is not None and res.rsa < 0.25: ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}")
255def get_version() -> str: 256 """Get the library version string.""" 257 ffi, lib = _get_lib() 258 return ffi.string(lib.zsasa_version()).decode("utf-8")
Get the library version string.