zsasa
zsasa: Fast SASA calculation using Zig.
This package provides Python bindings for the zsasa library, a high-performance implementation of Solvent Accessible Surface Area (SASA) calculation algorithms.
Example:
import numpy as np from zsasa import calculate_sasa
Single atom
coords = np.array([[0.0, 0.0, 0.0]]) radii = np.array([1.5]) result = calculate_sasa(coords, radii) print(f"Total SASA: {result.total_area:.2f} Ų")
>>> # Classify atoms >>> from zsasa import classify_atoms, get_radius >>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"]) >>> print(result.radii) # [1.87, 1.4]Integrations: For structure file support, use the gemmi integration:
>>> # pip install zsasa[gemmi] >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure >>> result = calculate_sasa_from_structure("protein.cif") >>> print(f"Total: {result.total_area:.1f} Ų")Analysis: For per-residue aggregation and RSA calculation:
>>> from zsasa import aggregate_from_result >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure >>> result = calculate_sasa_from_structure("protein.cif") >>> residues = aggregate_from_result(result) >>> for res in residues: ... if res.rsa is not None: ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}")MDTraj Integration: For MD trajectory analysis (requires mdtraj):
>>> # pip install mdtraj >>> from zsasa.mdtraj import compute_sasa >>> import mdtraj as md >>> traj = md.load('trajectory.xtc', top='topology.pdb') >>> sasa = compute_sasa(traj) # Returns (n_frames, n_atoms) in nm²MDAnalysis Integration: For MD trajectory analysis with MDAnalysis (requires MDAnalysis):
>>> # pip install MDAnalysis >>> import MDAnalysis as mda >>> from zsasa.mdanalysis import SASAAnalysis >>> u = mda.Universe('topology.pdb', 'trajectory.xtc') >>> sasa = SASAAnalysis(u, select='protein') >>> sasa.run() >>> print(sasa.results.total_area) # Returns per-frame SASA in ŲNative XTC Reader: For standalone XTC reading without MDTraj/MDAnalysis dependencies:
>>> from zsasa.xtc import XtcReader, compute_sasa_trajectory >>> # Low-level reader API >>> with XtcReader("trajectory.xtc") as reader: ... for frame in reader: ... print(f"Step {frame.step}") >>> >>> # High-level SASA calculation (radii from topology) >>> result = compute_sasa_trajectory("trajectory.xtc", radii) >>> print(result.total_areas) # Per-frame SASA in ŲNative DCD Reader: For standalone DCD reading (NAMD/CHARMM format):
>>> from zsasa.dcd import DcdReader, compute_sasa_trajectory >>> with DcdReader("trajectory.dcd") as reader: ... for frame in reader: ... print(f"Step {frame.step}, {frame.natoms} atoms") >>> >>> result = compute_sasa_trajectory("trajectory.dcd", radii) >>> print(result.total_areas) # Per-frame SASA in Ų
1"""zsasa: Fast SASA calculation using Zig. 2 3This package provides Python bindings for the zsasa library, 4a high-performance implementation of Solvent Accessible Surface Area (SASA) 5calculation algorithms. 6 7Example: 8 >>> import numpy as np 9 >>> from zsasa import calculate_sasa 10 >>> 11 >>> # Single atom 12 >>> coords = np.array([[0.0, 0.0, 0.0]]) 13 >>> radii = np.array([1.5]) 14 >>> result = calculate_sasa(coords, radii) 15 >>> print(f"Total SASA: {result.total_area:.2f} Ų") 16 17 >>> # Classify atoms 18 >>> from zsasa import classify_atoms, get_radius 19 >>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"]) 20 >>> print(result.radii) # [1.87, 1.4] 21 22Integrations: 23 For structure file support, use the gemmi integration: 24 25 >>> # pip install zsasa[gemmi] 26 >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure 27 >>> result = calculate_sasa_from_structure("protein.cif") 28 >>> print(f"Total: {result.total_area:.1f} Ų") 29 30Analysis: 31 For per-residue aggregation and RSA calculation: 32 33 >>> from zsasa import aggregate_from_result 34 >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure 35 >>> result = calculate_sasa_from_structure("protein.cif") 36 >>> residues = aggregate_from_result(result) 37 >>> for res in residues: 38 ... if res.rsa is not None: 39 ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}") 40 41MDTraj Integration: 42 For MD trajectory analysis (requires mdtraj): 43 44 >>> # pip install mdtraj 45 >>> from zsasa.mdtraj import compute_sasa 46 >>> import mdtraj as md 47 >>> traj = md.load('trajectory.xtc', top='topology.pdb') 48 >>> sasa = compute_sasa(traj) # Returns (n_frames, n_atoms) in nm² 49 50MDAnalysis Integration: 51 For MD trajectory analysis with MDAnalysis (requires MDAnalysis): 52 53 >>> # pip install MDAnalysis 54 >>> import MDAnalysis as mda 55 >>> from zsasa.mdanalysis import SASAAnalysis 56 >>> u = mda.Universe('topology.pdb', 'trajectory.xtc') 57 >>> sasa = SASAAnalysis(u, select='protein') 58 >>> sasa.run() 59 >>> print(sasa.results.total_area) # Returns per-frame SASA in Ų 60 61Native XTC Reader: 62 For standalone XTC reading without MDTraj/MDAnalysis dependencies: 63 64 >>> from zsasa.xtc import XtcReader, compute_sasa_trajectory 65 >>> # Low-level reader API 66 >>> with XtcReader("trajectory.xtc") as reader: 67 ... for frame in reader: 68 ... print(f"Step {frame.step}") 69 >>> 70 >>> # High-level SASA calculation (radii from topology) 71 >>> result = compute_sasa_trajectory("trajectory.xtc", radii) 72 >>> print(result.total_areas) # Per-frame SASA in Ų 73 74Native DCD Reader: 75 For standalone DCD reading (NAMD/CHARMM format): 76 77 >>> from zsasa.dcd import DcdReader, compute_sasa_trajectory 78 >>> with DcdReader("trajectory.dcd") as reader: 79 ... for frame in reader: 80 ... print(f"Step {frame.step}, {frame.natoms} atoms") 81 >>> 82 >>> result = compute_sasa_trajectory("trajectory.dcd", radii) 83 >>> print(result.total_areas) # Per-frame SASA in Ų 84""" 85 86from zsasa.analysis import ( 87 ResidueResult, 88 aggregate_by_residue, 89 aggregate_from_result, 90) 91from zsasa.core import ( 92 MAX_SASA, 93 AtomClass, 94 BatchDirResult, 95 BatchSasaResult, 96 ClassificationResult, 97 ClassifierType, 98 SasaResult, 99 calculate_rsa, 100 calculate_rsa_batch, 101 calculate_sasa, 102 calculate_sasa_batch, 103 classify_atoms, 104 get_atom_class, 105 get_max_sasa, 106 get_radius, 107 get_version, 108 guess_radius, 109 guess_radius_from_atom_name, 110 process_directory, 111) 112 113__all__ = [ 114 # SASA calculation 115 "calculate_sasa", 116 "calculate_sasa_batch", 117 "SasaResult", 118 "BatchSasaResult", 119 # Batch directory processing 120 "process_directory", 121 "BatchDirResult", 122 # Classifier 123 "ClassifierType", 124 "AtomClass", 125 "ClassificationResult", 126 "get_radius", 127 "get_atom_class", 128 "guess_radius", 129 "guess_radius_from_atom_name", 130 "classify_atoms", 131 # RSA 132 "MAX_SASA", 133 "get_max_sasa", 134 "calculate_rsa", 135 "calculate_rsa_batch", 136 # Analysis 137 "ResidueResult", 138 "aggregate_by_residue", 139 "aggregate_from_result", 140 # Utility 141 "get_version", 142] 143 144__version__ = get_version()
278def calculate_sasa( 279 coords: NDArray[np.float64], 280 radii: NDArray[np.float64], 281 *, 282 algorithm: Literal["sr", "lr"] = "sr", 283 n_points: int = 100, 284 n_slices: int = 20, 285 probe_radius: float = 1.4, 286 n_threads: int = 0, 287 use_bitmask: bool = False, 288) -> SasaResult: 289 """Calculate Solvent Accessible Surface Area (SASA). 290 291 Args: 292 coords: Atom coordinates as (N, 3) array. 293 radii: Atom radii as (N,) array. 294 algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). 295 n_points: Number of test points per atom (for SR algorithm). Default: 100. 296 n_slices: Number of slices per atom (for LR algorithm). Default: 20. 297 probe_radius: Water probe radius in Angstroms. Default: 1.4. 298 n_threads: Number of threads to use. 0 = auto-detect. Default: 0. 299 use_bitmask: Use bitmask LUT optimization for SR algorithm. 300 Supports n_points 1..1024. Default: False. 301 302 Returns: 303 SasaResult containing total_area and per-atom atom_areas. 304 305 Raises: 306 ValueError: If input arrays have invalid shapes or calculation fails. 307 308 Example: 309 >>> import numpy as np 310 >>> from zsasa import calculate_sasa 311 >>> 312 >>> # Two atoms 313 >>> coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]]) 314 >>> radii = np.array([1.5, 1.5]) 315 >>> result = calculate_sasa(coords, radii) 316 >>> print(f"Total: {result.total_area:.2f}") 317 """ 318 ffi, lib = _get_lib() 319 320 # Validate parameters 321 if n_points <= 0: 322 msg = f"n_points must be positive, got {n_points}" 323 raise ValueError(msg) 324 if n_slices <= 0: 325 msg = f"n_slices must be positive, got {n_slices}" 326 raise ValueError(msg) 327 if probe_radius <= 0: 328 msg = f"probe_radius must be positive, got {probe_radius}" 329 raise ValueError(msg) 330 if n_threads < 0: 331 msg = f"n_threads must be non-negative, got {n_threads}" 332 raise ValueError(msg) 333 334 # Validate bitmask constraints 335 if use_bitmask: 336 use_bitmask = _validate_bitmask_params(algorithm, n_points) 337 338 # Validate and convert inputs 339 coords = np.ascontiguousarray(coords, dtype=np.float64) 340 radii = np.ascontiguousarray(radii, dtype=np.float64) 341 342 if coords.ndim != 2 or coords.shape[1] != 3: 343 msg = f"coords must be (N, 3) array, got shape {coords.shape}" 344 raise ValueError(msg) 345 346 n_atoms = coords.shape[0] 347 if radii.shape != (n_atoms,): 348 msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}" 349 raise ValueError(msg) 350 351 if np.any(radii < 0): 352 msg = "All radii must be non-negative" 353 raise ValueError(msg) 354 355 # Extract x, y, z as contiguous arrays 356 x = np.ascontiguousarray(coords[:, 0]) 357 y = np.ascontiguousarray(coords[:, 1]) 358 z = np.ascontiguousarray(coords[:, 2]) 359 360 # Allocate output arrays 361 atom_areas = np.zeros(n_atoms, dtype=np.float64) 362 total_area = ffi.new("double*") 363 364 # Get cffi pointers from numpy arrays 365 x_ptr = ffi.cast("double*", x.ctypes.data) 366 y_ptr = ffi.cast("double*", y.ctypes.data) 367 z_ptr = ffi.cast("double*", z.ctypes.data) 368 radii_ptr = ffi.cast("double*", radii.ctypes.data) 369 areas_ptr = ffi.cast("double*", atom_areas.ctypes.data) 370 371 # Call the appropriate function 372 if use_bitmask: 373 result = lib.zsasa_calc_sr_bitmask( 374 x_ptr, 375 y_ptr, 376 z_ptr, 377 radii_ptr, 378 n_atoms, 379 n_points, 380 probe_radius, 381 n_threads, 382 areas_ptr, 383 total_area, 384 ) 385 elif algorithm == "sr": 386 result = lib.zsasa_calc_sr( 387 x_ptr, 388 y_ptr, 389 z_ptr, 390 radii_ptr, 391 n_atoms, 392 n_points, 393 probe_radius, 394 n_threads, 395 areas_ptr, 396 total_area, 397 ) 398 elif algorithm == "lr": 399 result = lib.zsasa_calc_lr( 400 x_ptr, 401 y_ptr, 402 z_ptr, 403 radii_ptr, 404 n_atoms, 405 n_slices, 406 probe_radius, 407 n_threads, 408 areas_ptr, 409 total_area, 410 ) 411 else: 412 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 413 raise ValueError(msg) 414 415 # Check for errors 416 if result == ZSASA_ERROR_INVALID_INPUT: 417 msg = "Invalid input to SASA calculation" 418 raise ValueError(msg) 419 elif result == ZSASA_ERROR_OUT_OF_MEMORY: 420 msg = "Out of memory during SASA calculation" 421 raise MemoryError(msg) 422 elif result == ZSASA_ERROR_CALCULATION: 423 msg = "Error during SASA calculation" 424 raise RuntimeError(msg) 425 elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS: 426 msg = ( 427 f"Unsupported n_points for bitmask: {n_points}. " 428 f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}" 429 ) 430 raise ValueError(msg) 431 elif result != ZSASA_OK: 432 msg = f"Unknown error code: {result}" 433 raise RuntimeError(msg) 434 435 return SasaResult( 436 total_area=total_area[0], 437 atom_areas=atom_areas, 438 )
Calculate Solvent Accessible Surface Area (SASA).
Args: coords: Atom coordinates as (N, 3) array. radii: Atom radii as (N,) array. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.
Returns: SasaResult containing total_area and per-atom atom_areas.
Raises: ValueError: If input arrays have invalid shapes or calculation fails.
Example:
import numpy as np from zsasa import calculate_sasa
Two atoms
coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]]) radii = np.array([1.5, 1.5]) result = calculate_sasa(coords, radii) print(f"Total: {result.total_area:.2f}")
879def calculate_sasa_batch( 880 coordinates: NDArray[np.floating], 881 radii: NDArray[np.floating], 882 *, 883 algorithm: Literal["sr", "lr"] = "sr", 884 n_points: int = 100, 885 n_slices: int = 20, 886 probe_radius: float = 1.4, 887 n_threads: int = 0, 888 precision: Literal["f64", "f32"] = "f64", 889 use_bitmask: bool = False, 890) -> BatchSasaResult: 891 """Calculate SASA for multiple frames (batch processing). 892 893 Optimized for MD trajectory analysis where the same atoms are processed 894 across multiple frames. Parallelizes across frames for maximum performance. 895 896 Args: 897 coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms. 898 radii: Atom radii as (n_atoms,) array in Angstroms. 899 algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). 900 n_points: Number of test points per atom (for SR algorithm). Default: 100. 901 n_slices: Number of slices per atom (for LR algorithm). Default: 20. 902 probe_radius: Water probe radius in Angstroms. Default: 1.4. 903 n_threads: Number of threads to use. 0 = auto-detect. Default: 0. 904 precision: Internal calculation precision: "f64" (default, higher precision) 905 or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64". 906 use_bitmask: Use bitmask LUT optimization for SR algorithm. 907 Supports n_points 1..1024. Default: False. 908 909 Returns: 910 BatchSasaResult containing per-atom SASA for all frames. 911 912 Raises: 913 ValueError: If input arrays have invalid shapes or calculation fails. 914 915 Example: 916 >>> import numpy as np 917 >>> from zsasa import calculate_sasa_batch 918 >>> 919 >>> # 10 frames, 100 atoms 920 >>> coords = np.random.randn(10, 100, 3).astype(np.float32) 921 >>> radii = np.full(100, 1.5, dtype=np.float32) 922 >>> result = calculate_sasa_batch(coords, radii) 923 >>> print(f"Shape: {result.atom_areas.shape}") # (10, 100) 924 >>> print(f"Total SASA per frame: {result.total_areas}") 925 """ 926 ffi, lib = _get_lib() 927 928 # Validate parameters 929 if n_points <= 0: 930 msg = f"n_points must be positive, got {n_points}" 931 raise ValueError(msg) 932 if n_slices <= 0: 933 msg = f"n_slices must be positive, got {n_slices}" 934 raise ValueError(msg) 935 if probe_radius <= 0: 936 msg = f"probe_radius must be positive, got {probe_radius}" 937 raise ValueError(msg) 938 if n_threads < 0: 939 msg = f"n_threads must be non-negative, got {n_threads}" 940 raise ValueError(msg) 941 942 # Validate bitmask constraints 943 if use_bitmask: 944 use_bitmask = _validate_bitmask_params(algorithm, n_points) 945 946 # Validate and convert inputs 947 coordinates = np.ascontiguousarray(coordinates, dtype=np.float32) 948 radii = np.ascontiguousarray(radii, dtype=np.float32) 949 950 if coordinates.ndim != 3 or coordinates.shape[2] != 3: 951 msg = f"coordinates must be (n_frames, n_atoms, 3) array, got shape {coordinates.shape}" 952 raise ValueError(msg) 953 954 n_frames = coordinates.shape[0] 955 n_atoms = coordinates.shape[1] 956 957 if radii.shape != (n_atoms,): 958 msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}" 959 raise ValueError(msg) 960 961 if np.any(radii < 0): 962 msg = "All radii must be non-negative" 963 raise ValueError(msg) 964 965 # Allocate output array 966 atom_areas = np.zeros((n_frames, n_atoms), dtype=np.float32) 967 968 # Get cffi pointers from numpy arrays 969 coords_ptr = ffi.cast("float*", coordinates.ctypes.data) 970 radii_ptr = ffi.cast("float*", radii.ctypes.data) 971 areas_ptr = ffi.cast("float*", atom_areas.ctypes.data) 972 973 # Call the appropriate batch function based on algorithm, precision, and bitmask 974 if use_bitmask: 975 if precision == "f32": 976 result = lib.zsasa_calc_sr_batch_bitmask_f32( 977 coords_ptr, 978 n_frames, 979 n_atoms, 980 radii_ptr, 981 n_points, 982 probe_radius, 983 n_threads, 984 areas_ptr, 985 ) 986 else: 987 result = lib.zsasa_calc_sr_batch_bitmask( 988 coords_ptr, 989 n_frames, 990 n_atoms, 991 radii_ptr, 992 n_points, 993 probe_radius, 994 n_threads, 995 areas_ptr, 996 ) 997 elif precision == "f64": 998 # Default: f32 I/O with f64 internal precision 999 if algorithm == "sr": 1000 result = lib.zsasa_calc_sr_batch( 1001 coords_ptr, 1002 n_frames, 1003 n_atoms, 1004 radii_ptr, 1005 n_points, 1006 probe_radius, 1007 n_threads, 1008 areas_ptr, 1009 ) 1010 elif algorithm == "lr": 1011 result = lib.zsasa_calc_lr_batch( 1012 coords_ptr, 1013 n_frames, 1014 n_atoms, 1015 radii_ptr, 1016 n_slices, 1017 probe_radius, 1018 n_threads, 1019 areas_ptr, 1020 ) 1021 else: 1022 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 1023 raise ValueError(msg) 1024 elif precision == "f32": 1025 # Pure f32 precision (matches RustSASA/mdsasa-bolt) 1026 if algorithm == "sr": 1027 result = lib.zsasa_calc_sr_batch_f32( 1028 coords_ptr, 1029 n_frames, 1030 n_atoms, 1031 radii_ptr, 1032 n_points, 1033 probe_radius, 1034 n_threads, 1035 areas_ptr, 1036 ) 1037 elif algorithm == "lr": 1038 result = lib.zsasa_calc_lr_batch_f32( 1039 coords_ptr, 1040 n_frames, 1041 n_atoms, 1042 radii_ptr, 1043 n_slices, 1044 probe_radius, 1045 n_threads, 1046 areas_ptr, 1047 ) 1048 else: 1049 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 1050 raise ValueError(msg) 1051 else: 1052 msg = f"Unknown precision: {precision}. Use 'f64' or 'f32'." 1053 raise ValueError(msg) 1054 1055 # Check for errors 1056 if result == ZSASA_ERROR_INVALID_INPUT: 1057 msg = "Invalid input to batch SASA calculation" 1058 raise ValueError(msg) 1059 elif result == ZSASA_ERROR_OUT_OF_MEMORY: 1060 msg = "Out of memory during batch SASA calculation" 1061 raise MemoryError(msg) 1062 elif result == ZSASA_ERROR_CALCULATION: 1063 msg = "Error during batch SASA calculation" 1064 raise RuntimeError(msg) 1065 elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS: 1066 msg = ( 1067 f"Unsupported n_points for bitmask: {n_points}. " 1068 f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}" 1069 ) 1070 raise ValueError(msg) 1071 elif result != ZSASA_OK: 1072 msg = f"Unknown error code: {result}" 1073 raise RuntimeError(msg) 1074 1075 return BatchSasaResult(atom_areas=atom_areas)
Calculate SASA for multiple frames (batch processing).
Optimized for MD trajectory analysis where the same atoms are processed across multiple frames. Parallelizes across frames for maximum performance.
Args: coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms. radii: Atom radii as (n_atoms,) array in Angstroms. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. precision: Internal calculation precision: "f64" (default, higher precision) or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64". use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.
Returns: BatchSasaResult containing per-atom SASA for all frames.
Raises: ValueError: If input arrays have invalid shapes or calculation fails.
Example:
import numpy as np from zsasa import calculate_sasa_batch
10 frames, 100 atoms
coords = np.random.randn(10, 100, 3).astype(np.float32) radii = np.full(100, 1.5, dtype=np.float32) result = calculate_sasa_batch(coords, radii) print(f"Shape: {result.atom_areas.shape}") # (10, 100) print(f"Total SASA per frame: {result.total_areas}")
265@dataclass 266class SasaResult: 267 """Result of SASA calculation. 268 269 Attributes: 270 total_area: Total solvent accessible surface area in Ų. 271 atom_areas: Per-atom SASA values in Ų. 272 """ 273 274 total_area: float 275 atom_areas: NDArray[np.float64]
Result of SASA calculation.
Attributes: total_area: Total solvent accessible surface area in Ų. atom_areas: Per-atom SASA values in Ų.
849@dataclass 850class BatchSasaResult: 851 """Result of batch SASA calculation for multiple frames. 852 853 Attributes: 854 atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms). 855 Values are in Angstrom² (Ų). 856 """ 857 858 atom_areas: NDArray[np.float32] 859 860 @property 861 def n_frames(self) -> int: 862 """Number of frames.""" 863 return self.atom_areas.shape[0] 864 865 @property 866 def n_atoms(self) -> int: 867 """Number of atoms.""" 868 return self.atom_areas.shape[1] 869 870 @property 871 def total_areas(self) -> NDArray[np.float32]: 872 """Total SASA per frame, shape (n_frames,).""" 873 return self.atom_areas.sum(axis=1) 874 875 def __repr__(self) -> str: 876 return f"BatchSasaResult(n_frames={self.n_frames}, n_atoms={self.n_atoms})"
Result of batch SASA calculation for multiple frames.
Attributes: atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms). Values are in Angstrom² (Ų).
860 @property 861 def n_frames(self) -> int: 862 """Number of frames.""" 863 return self.atom_areas.shape[0]
Number of frames.
1131def process_directory( 1132 input_dir: str | Path, 1133 *, 1134 output_dir: str | Path | None = None, 1135 algorithm: Literal["sr", "lr"] = "sr", 1136 n_points: int = 100, 1137 n_slices: int = 20, 1138 probe_radius: float = 1.4, 1139 n_threads: int = 0, 1140 classifier: ClassifierType | None = ClassifierType.PROTOR, 1141 include_hydrogens: bool = False, 1142 include_hetatm: bool = False, 1143) -> BatchDirResult: 1144 """Process all supported structure files in a directory for SASA calculation. 1145 1146 Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent), 1147 JSON (.json), and their gzip-compressed variants (.gz). 1148 1149 Args: 1150 input_dir: Path to directory containing structure files. 1151 output_dir: Optional path for per-file output. None = no file output. 1152 algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). 1153 n_points: Number of test points per atom (SR only; ignored for LR). 1154 Default: 100. 1155 n_slices: Number of slices per atom (LR only; ignored for SR). 1156 Default: 20. 1157 probe_radius: Water probe radius in Angstroms. Default: 1.4. 1158 n_threads: Number of threads to use. 0 = auto-detect. Default: 0. 1159 classifier: Classifier for radius assignment. None = use input radii. 1160 Default: ClassifierType.PROTOR. 1161 include_hydrogens: Whether to include hydrogen atoms. Default: False. 1162 include_hetatm: Whether to include HETATM records. Default: False. 1163 1164 Returns: 1165 BatchDirResult with per-file details. 1166 1167 Raises: 1168 ValueError: If input parameters are invalid. 1169 FileNotFoundError: If the input directory does not exist. 1170 MemoryError: If out of memory. 1171 RuntimeError: For other processing errors. 1172 1173 Example: 1174 >>> from zsasa import process_directory 1175 >>> result = process_directory("path/to/pdbs/") 1176 >>> print(f"Processed {result.successful}/{result.total_files} files") 1177 """ 1178 ffi, lib = _get_lib() 1179 1180 # Validate algorithm 1181 if algorithm == "sr": 1182 algo_int = ZSASA_ALGORITHM_SR 1183 n_points_val = n_points 1184 elif algorithm == "lr": 1185 algo_int = ZSASA_ALGORITHM_LR 1186 n_points_val = n_slices 1187 else: 1188 msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'." 1189 raise ValueError(msg) 1190 1191 if n_points <= 0: 1192 msg = f"n_points must be positive, got {n_points}" 1193 raise ValueError(msg) 1194 if n_slices <= 0: 1195 msg = f"n_slices must be positive, got {n_slices}" 1196 raise ValueError(msg) 1197 if probe_radius <= 0: 1198 msg = f"probe_radius must be positive, got {probe_radius}" 1199 raise ValueError(msg) 1200 if n_threads < 0: 1201 msg = f"n_threads must be non-negative, got {n_threads}" 1202 raise ValueError(msg) 1203 1204 # Map classifier 1205 classifier_int = int(classifier) if classifier is not None else -1 1206 1207 # Convert paths 1208 input_dir_bytes = str(input_dir).encode("utf-8") 1209 output_dir_bytes = str(output_dir).encode("utf-8") if output_dir is not None else ffi.NULL 1210 1211 error_code = ffi.new("int*") 1212 1213 handle = lib.zsasa_batch_dir_process( 1214 input_dir_bytes, 1215 output_dir_bytes, 1216 algo_int, 1217 n_points_val, 1218 probe_radius, 1219 n_threads, 1220 classifier_int, 1221 int(include_hydrogens), 1222 int(include_hetatm), 1223 error_code, 1224 ) 1225 1226 if handle == ffi.NULL: 1227 ec = error_code[0] 1228 if ec == ZSASA_ERROR_INVALID_INPUT: 1229 msg = "Invalid input parameters for directory batch processing" 1230 raise ValueError(msg) 1231 elif ec == ZSASA_ERROR_OUT_OF_MEMORY: 1232 msg = "Out of memory during directory batch processing" 1233 raise MemoryError(msg) 1234 elif ec == ZSASA_ERROR_CALCULATION: 1235 msg = "SASA calculation failed during directory batch processing" 1236 raise RuntimeError(msg) 1237 elif ec == ZSASA_ERROR_FILE_IO: 1238 msg = f"Directory not found or not readable: {input_dir}" 1239 raise FileNotFoundError(msg) 1240 else: 1241 msg = f"Directory batch processing failed with error code: {ec}" 1242 raise RuntimeError(msg) 1243 1244 try: 1245 total_files = lib.zsasa_batch_dir_get_total_files(handle) 1246 successful = lib.zsasa_batch_dir_get_successful(handle) 1247 failed = lib.zsasa_batch_dir_get_failed(handle) 1248 1249 filenames: list[str] = [] 1250 n_atoms_list: list[int] = [] 1251 total_sasa_list: list[float] = [] 1252 status_list: list[int] = [] 1253 1254 for i in range(total_files): 1255 fname_ptr = lib.zsasa_batch_dir_get_filename(handle, i) 1256 if fname_ptr == ffi.NULL: 1257 msg = ( 1258 f"Internal error: zsasa_batch_dir_get_filename returned NULL " 1259 f"for index {i} (total_files={total_files})" 1260 ) 1261 raise RuntimeError(msg) 1262 filenames.append(ffi.string(fname_ptr).decode("utf-8")) 1263 n_atoms_list.append(lib.zsasa_batch_dir_get_n_atoms(handle, i)) 1264 sasa = lib.zsasa_batch_dir_get_total_sasa(handle, i) 1265 total_sasa_list.append(float("nan") if math.isnan(sasa) else sasa) 1266 st = lib.zsasa_batch_dir_get_status(handle, i) 1267 if st not in (0, 1): 1268 msg = f"Internal error: unexpected status {st} for file index {i} (expected 0 or 1)" 1269 raise RuntimeError(msg) 1270 status_list.append(st) 1271 1272 return BatchDirResult( 1273 total_files=total_files, 1274 successful=successful, 1275 failed=failed, 1276 filenames=filenames, 1277 n_atoms=n_atoms_list, 1278 total_sasa=total_sasa_list, 1279 status=status_list, 1280 ) 1281 finally: 1282 lib.zsasa_batch_dir_free(handle)
Process all supported structure files in a directory for SASA calculation.
Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent), JSON (.json), and their gzip-compressed variants (.gz).
Args: input_dir: Path to directory containing structure files. output_dir: Optional path for per-file output. None = no file output. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (SR only; ignored for LR). Default: 100. n_slices: Number of slices per atom (LR only; ignored for SR). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. classifier: Classifier for radius assignment. None = use input radii. Default: ClassifierType.PROTOR. include_hydrogens: Whether to include hydrogen atoms. Default: False. include_hetatm: Whether to include HETATM records. Default: False.
Returns: BatchDirResult with per-file details.
Raises: ValueError: If input parameters are invalid. FileNotFoundError: If the input directory does not exist. MemoryError: If out of memory. RuntimeError: For other processing errors.
Example:
from zsasa import process_directory result = process_directory("path/to/pdbs/") print(f"Processed {result.successful}/{result.total_files} files")
1083@dataclass 1084class BatchDirResult: 1085 """Result of directory batch processing. 1086 1087 Attributes: 1088 total_files: Number of supported structure files found in the directory. 1089 successful: Number of successfully processed files. 1090 failed: Number of files that failed processing. 1091 filenames: List of filenames (file name only, without directory path). 1092 n_atoms: Per-file atom counts. 1093 total_sasa: Per-file total SASA in Angstroms² (NaN for failed files). 1094 status: Per-file status (1=ok, 0=failed). 1095 """ 1096 1097 total_files: int 1098 successful: int 1099 failed: int 1100 filenames: list[str] 1101 n_atoms: list[int] 1102 total_sasa: list[float] 1103 status: list[int] 1104 1105 def __post_init__(self) -> None: 1106 n = len(self.filenames) 1107 if len(self.n_atoms) != n or len(self.total_sasa) != n or len(self.status) != n: 1108 msg = ( 1109 f"All per-file lists must have the same length as filenames ({n}), " 1110 f"got n_atoms={len(self.n_atoms)}, total_sasa={len(self.total_sasa)}, " 1111 f"status={len(self.status)}" 1112 ) 1113 raise ValueError(msg) 1114 if self.total_files != n: 1115 msg = f"total_files ({self.total_files}) != len(filenames) ({n})" 1116 raise ValueError(msg) 1117 if self.successful + self.failed != self.total_files: 1118 msg = ( 1119 f"successful ({self.successful}) + failed ({self.failed}) " 1120 f"!= total_files ({self.total_files})" 1121 ) 1122 raise ValueError(msg) 1123 1124 def __repr__(self) -> str: 1125 return ( 1126 f"BatchDirResult(total_files={self.total_files}, " 1127 f"successful={self.successful}, failed={self.failed})" 1128 )
Result of directory batch processing.
Attributes: total_files: Number of supported structure files found in the directory. successful: Number of successfully processed files. failed: Number of files that failed processing. filenames: List of filenames (file name only, without directory path). n_atoms: Per-file atom counts. total_sasa: Per-file total SASA in Angstroms² (NaN for failed files). status: Per-file status (1=ok, 0=failed).
446class ClassifierType(IntEnum): 447 """Available classifier types for atom radius assignment. 448 449 Attributes: 450 NACCESS: NACCESS-compatible radii (default, most commonly used). 451 PROTOR: ProtOr radii based on hybridization state. 452 OONS: Ooi, Oobatake, Nemethy, Scheraga radii (older FreeSASA default). 453 """ 454 455 NACCESS = ZSASA_CLASSIFIER_NACCESS 456 PROTOR = ZSASA_CLASSIFIER_PROTOR 457 OONS = ZSASA_CLASSIFIER_OONS
Available classifier types for atom radius assignment.
Attributes: NACCESS: NACCESS-compatible radii (default, most commonly used). PROTOR: ProtOr radii based on hybridization state. OONS: Ooi, Oobatake, Nemethy, Scheraga radii (older FreeSASA default).
460class AtomClass(IntEnum): 461 """Atom polarity classes. 462 463 Attributes: 464 POLAR: Polar atoms (e.g., N, O). 465 APOLAR: Apolar/hydrophobic atoms (e.g., C). 466 UNKNOWN: Unknown classification. 467 """ 468 469 POLAR = ZSASA_ATOM_CLASS_POLAR 470 APOLAR = ZSASA_ATOM_CLASS_APOLAR 471 UNKNOWN = ZSASA_ATOM_CLASS_UNKNOWN
Atom polarity classes.
Attributes: POLAR: Polar atoms (e.g., N, O). APOLAR: Apolar/hydrophobic atoms (e.g., C). UNKNOWN: Unknown classification.
595@dataclass 596class ClassificationResult: 597 """Result of batch atom classification. 598 599 Attributes: 600 radii: Per-atom van der Waals radii in Angstroms. 601 NaN values indicate atoms not found in the classifier. 602 Use np.isnan(result.radii) to find unknown atoms. 603 classes: Per-atom polarity classes (AtomClass constants). 604 605 Example: 606 >>> result = classify_atoms(residues, atoms) 607 >>> unknown_mask = np.isnan(result.radii) 608 >>> if unknown_mask.any(): 609 ... print(f"Found {unknown_mask.sum()} unknown atoms") 610 """ 611 612 radii: NDArray[np.float64] 613 classes: NDArray[np.int32] 614 615 def __repr__(self) -> str: 616 return f"ClassificationResult(n_atoms={len(self.radii)})"
Result of batch atom classification.
Attributes: radii: Per-atom van der Waals radii in Angstroms. NaN values indicate atoms not found in the classifier. Use np.isnan(result.radii) to find unknown atoms. classes: Per-atom polarity classes (AtomClass constants).
Example:
result = classify_atoms(residues, atoms) unknown_mask = np.isnan(result.radii) if unknown_mask.any(): ... print(f"Found {unknown_mask.sum()} unknown atoms")
479def get_radius( 480 residue: str, 481 atom: str, 482 classifier_type: ClassifierType = ClassifierType.NACCESS, 483) -> float | None: 484 """Get van der Waals radius for an atom using the specified classifier. 485 486 Args: 487 residue: Residue name (e.g., "ALA", "GLY"). 488 atom: Atom name (e.g., "CA", "CB"). 489 classifier_type: Classifier to use. Default: ClassifierType.NACCESS. 490 491 Returns: 492 Radius in Angstroms, or None if atom is not found in classifier. 493 494 Example: 495 >>> from zsasa import get_radius, ClassifierType 496 >>> get_radius("ALA", "CA") 497 1.87 498 >>> get_radius("ALA", "XX") # Unknown atom 499 None 500 """ 501 _, lib = _get_lib() 502 radius = lib.zsasa_classifier_get_radius( 503 classifier_type, 504 residue.encode("utf-8"), 505 atom.encode("utf-8"), 506 ) 507 if np.isnan(radius): 508 return None 509 return radius
Get van der Waals radius for an atom using the specified classifier.
Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.NACCESS.
Returns: Radius in Angstroms, or None if atom is not found in classifier.
Example:
from zsasa import get_radius, ClassifierType get_radius("ALA", "CA") 1.87 get_radius("ALA", "XX") # Unknown atom None
512def get_atom_class( 513 residue: str, 514 atom: str, 515 classifier_type: ClassifierType = ClassifierType.NACCESS, 516) -> int: 517 """Get atom polarity class using the specified classifier. 518 519 Args: 520 residue: Residue name (e.g., "ALA", "GLY"). 521 atom: Atom name (e.g., "CA", "CB"). 522 classifier_type: Classifier to use. Default: ClassifierType.NACCESS. 523 524 Returns: 525 AtomClass constant (POLAR, APOLAR, or UNKNOWN). 526 527 Example: 528 >>> from zsasa import get_atom_class, AtomClass 529 >>> get_atom_class("ALA", "CA") == AtomClass.APOLAR 530 True 531 >>> get_atom_class("ALA", "O") == AtomClass.POLAR 532 True 533 """ 534 _, lib = _get_lib() 535 return lib.zsasa_classifier_get_class( 536 classifier_type, 537 residue.encode("utf-8"), 538 atom.encode("utf-8"), 539 )
Get atom polarity class using the specified classifier.
Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.NACCESS.
Returns: AtomClass constant (POLAR, APOLAR, or UNKNOWN).
Example:
from zsasa import get_atom_class, AtomClass get_atom_class("ALA", "CA") == AtomClass.APOLAR True get_atom_class("ALA", "O") == AtomClass.POLAR True
542def guess_radius(element: str) -> float | None: 543 """Guess van der Waals radius from element symbol. 544 545 Args: 546 element: Element symbol (e.g., "C", "N", "FE"). 547 Case-insensitive, whitespace is trimmed. 548 549 Returns: 550 Radius in Angstroms, or None if element is not recognized. 551 552 Example: 553 >>> from zsasa import guess_radius 554 >>> guess_radius("C") 555 1.7 556 >>> guess_radius("FE") 557 1.26 558 >>> guess_radius("XX") # Unknown 559 None 560 """ 561 _, lib = _get_lib() 562 radius = lib.zsasa_guess_radius(element.encode("utf-8")) 563 if np.isnan(radius): 564 return None 565 return radius
Guess van der Waals radius from element symbol.
Args: element: Element symbol (e.g., "C", "N", "FE"). Case-insensitive, whitespace is trimmed.
Returns: Radius in Angstroms, or None if element is not recognized.
Example:
from zsasa import guess_radius guess_radius("C") 1.7 guess_radius("FE") 1.26 guess_radius("XX") # Unknown None
568def guess_radius_from_atom_name(atom_name: str) -> float | None: 569 """Guess van der Waals radius from PDB-style atom name. 570 571 Extracts element symbol from atom name following PDB conventions: 572 - Leading space indicates single-char element (e.g., " CA " = Carbon alpha) 573 - No leading space may indicate 2-char element (e.g., "FE " = Iron) 574 575 Args: 576 atom_name: PDB-style atom name (e.g., " CA ", "FE "). 577 578 Returns: 579 Radius in Angstroms, or None if element cannot be determined. 580 581 Example: 582 >>> from zsasa import guess_radius_from_atom_name 583 >>> guess_radius_from_atom_name(" CA ") # Carbon alpha 584 1.7 585 >>> guess_radius_from_atom_name("FE ") # Iron 586 1.26 587 """ 588 _, lib = _get_lib() 589 radius = lib.zsasa_guess_radius_from_atom_name(atom_name.encode("utf-8")) 590 if np.isnan(radius): 591 return None 592 return radius
Guess van der Waals radius from PDB-style atom name.
Extracts element symbol from atom name following PDB conventions:
- Leading space indicates single-char element (e.g., " CA " = Carbon alpha)
- No leading space may indicate 2-char element (e.g., "FE " = Iron)
Args: atom_name: PDB-style atom name (e.g., " CA ", "FE ").
Returns: Radius in Angstroms, or None if element cannot be determined.
Example:
from zsasa import guess_radius_from_atom_name guess_radius_from_atom_name(" CA ") # Carbon alpha 1.7 guess_radius_from_atom_name("FE ") # Iron 1.26
619def classify_atoms( 620 residues: list[str], 621 atoms: list[str], 622 classifier_type: ClassifierType = ClassifierType.NACCESS, 623 *, 624 include_classes: bool = True, 625) -> ClassificationResult: 626 """Classify multiple atoms at once (batch operation). 627 628 This is more efficient than calling get_radius for each atom individually. 629 630 Args: 631 residues: List of residue names. 632 atoms: List of atom names (must be same length as residues). 633 classifier_type: Classifier to use. Default: ClassifierType.NACCESS. 634 include_classes: Whether to compute atom classes. Default: True. 635 636 Returns: 637 ClassificationResult with radii and classes arrays. 638 Unknown atoms have NaN radius and UNKNOWN class. 639 640 Raises: 641 ValueError: If residues and atoms have different lengths. 642 643 Example: 644 >>> from zsasa import classify_atoms 645 >>> result = classify_atoms( 646 ... ["ALA", "ALA", "GLY"], 647 ... ["CA", "O", "N"], 648 ... ) 649 >>> result.radii 650 array([1.87, 1.4 , 1.65]) 651 """ 652 ffi, lib = _get_lib() 653 654 if len(residues) != len(atoms): 655 msg = f"residues and atoms must have same length: {len(residues)} != {len(atoms)}" 656 raise ValueError(msg) 657 658 n_atoms = len(residues) 659 if n_atoms == 0: 660 return ClassificationResult( 661 radii=np.array([], dtype=np.float64), 662 classes=np.array([], dtype=np.int32), 663 ) 664 665 # Encode strings and create cffi arrays 666 # Keep references to prevent garbage collection 667 residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residues] 668 atoms_bytes = [ffi.new("char[]", a.encode("utf-8")) for a in atoms] 669 670 residues_arr = ffi.new("char*[]", residues_bytes) 671 atoms_arr = ffi.new("char*[]", atoms_bytes) 672 673 # Allocate output arrays 674 radii = np.zeros(n_atoms, dtype=np.float64) 675 classes = np.zeros(n_atoms, dtype=np.int32) if include_classes else None 676 677 radii_ptr = ffi.cast("double*", radii.ctypes.data) 678 classes_ptr = ffi.cast("int*", classes.ctypes.data) if include_classes else ffi.NULL 679 680 result = lib.zsasa_classify_atoms( 681 classifier_type, 682 residues_arr, 683 atoms_arr, 684 n_atoms, 685 radii_ptr, 686 classes_ptr, 687 ) 688 689 if result == ZSASA_ERROR_INVALID_INPUT: 690 msg = f"Invalid classifier type: {classifier_type}" 691 raise ValueError(msg) 692 elif result != ZSASA_OK: 693 msg = f"Classification error: {result}" 694 raise RuntimeError(msg) 695 696 if not include_classes: 697 classes = np.full(n_atoms, AtomClass.UNKNOWN, dtype=np.int32) 698 699 return ClassificationResult(radii=radii, classes=classes)
Classify multiple atoms at once (batch operation).
This is more efficient than calling get_radius for each atom individually.
Args: residues: List of residue names. atoms: List of atom names (must be same length as residues). classifier_type: Classifier to use. Default: ClassifierType.NACCESS. include_classes: Whether to compute atom classes. Default: True.
Returns: ClassificationResult with radii and classes arrays. Unknown atoms have NaN radius and UNKNOWN class.
Raises: ValueError: If residues and atoms have different lengths.
Example:
from zsasa import classify_atoms result = classify_atoms( ... ["ALA", "ALA", "GLY"], ... ["CA", "O", "N"], ... ) result.radii array([1.87, 1.4 , 1.65])
731def get_max_sasa(residue_name: str) -> float | None: 732 """Get maximum SASA value for a standard amino acid. 733 734 Values from Tien et al. (2013) "Maximum allowed solvent accessibilities 735 of residues in proteins". 736 737 Args: 738 residue_name: 3-letter residue code (e.g., "ALA", "GLY"). 739 740 Returns: 741 Maximum SASA in Angstroms², or None if residue is not a standard amino acid. 742 743 Example: 744 >>> from zsasa import get_max_sasa 745 >>> get_max_sasa("ALA") 746 129.0 747 >>> get_max_sasa("TRP") 748 285.0 749 >>> get_max_sasa("HOH") # Water - not a standard amino acid 750 None 751 """ 752 _, lib = _get_lib() 753 max_sasa = lib.zsasa_get_max_sasa(residue_name.encode("utf-8")) 754 if np.isnan(max_sasa): 755 return None 756 return max_sasa
Get maximum SASA value for a standard amino acid.
Values from Tien et al. (2013) "Maximum allowed solvent accessibilities of residues in proteins".
Args: residue_name: 3-letter residue code (e.g., "ALA", "GLY").
Returns: Maximum SASA in Angstroms², or None if residue is not a standard amino acid.
Example:
from zsasa import get_max_sasa get_max_sasa("ALA") 129.0 get_max_sasa("TRP") 285.0 get_max_sasa("HOH") # Water - not a standard amino acid None
759def calculate_rsa(sasa: float, residue_name: str) -> float | None: 760 """Calculate RSA (Relative Solvent Accessibility) for a single residue. 761 762 RSA = SASA / MaxSASA 763 764 Args: 765 sasa: Observed SASA value in Angstroms². 766 residue_name: 3-letter residue code (e.g., "ALA", "GLY"). 767 768 Returns: 769 RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid. 770 Note: RSA > 1.0 is possible for exposed terminal residues. 771 772 Example: 773 >>> from zsasa import calculate_rsa 774 >>> calculate_rsa(64.5, "ALA") # 64.5 / 129.0 = 0.5 775 0.5 776 >>> calculate_rsa(150.0, "GLY") # RSA > 1.0 is possible 777 1.4423076923076923 778 """ 779 _, lib = _get_lib() 780 rsa = lib.zsasa_calculate_rsa(sasa, residue_name.encode("utf-8")) 781 if np.isnan(rsa): 782 return None 783 return rsa
Calculate RSA (Relative Solvent Accessibility) for a single residue.
RSA = SASA / MaxSASA
Args: sasa: Observed SASA value in Angstroms². residue_name: 3-letter residue code (e.g., "ALA", "GLY").
Returns: RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid. Note: RSA > 1.0 is possible for exposed terminal residues.
Example:
from zsasa import calculate_rsa calculate_rsa(64.5, "ALA") # 64.5 / 129.0 = 0.5 0.5 calculate_rsa(150.0, "GLY") # RSA > 1.0 is possible 1.4423076923076923
786def calculate_rsa_batch( 787 sasas: NDArray[np.float64] | list[float], 788 residue_names: list[str], 789) -> NDArray[np.float64]: 790 """Calculate RSA for multiple residues at once (batch operation). 791 792 This is more efficient than calling calculate_rsa for each residue individually. 793 794 Args: 795 sasas: Array of SASA values in Angstroms². 796 residue_names: List of 3-letter residue codes (must be same length as sasas). 797 798 Returns: 799 Array of RSA values. NaN values indicate non-standard amino acids. 800 801 Raises: 802 ValueError: If sasas and residue_names have different lengths. 803 804 Example: 805 >>> import numpy as np 806 >>> from zsasa import calculate_rsa_batch 807 >>> sasas = np.array([64.5, 52.0, 100.0]) 808 >>> residues = ["ALA", "GLY", "HOH"] # HOH is not standard 809 >>> rsa = calculate_rsa_batch(sasas, residues) 810 >>> rsa 811 array([0.5 , 0.5 , nan]) 812 """ 813 ffi, lib = _get_lib() 814 815 sasas = np.ascontiguousarray(sasas, dtype=np.float64) 816 n_residues = len(sasas) 817 818 if len(residue_names) != n_residues: 819 msg = f"sasas and residue_names must have same length: {n_residues} != {len(residue_names)}" 820 raise ValueError(msg) 821 822 if n_residues == 0: 823 return np.array([], dtype=np.float64) 824 825 # Encode strings and create cffi array 826 # Keep references to prevent garbage collection 827 residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residue_names] 828 residues_arr = ffi.new("char*[]", residues_bytes) 829 830 # Allocate output array 831 rsa_out = np.zeros(n_residues, dtype=np.float64) 832 833 sasas_ptr = ffi.cast("double*", sasas.ctypes.data) 834 rsa_ptr = ffi.cast("double*", rsa_out.ctypes.data) 835 836 result = lib.zsasa_calculate_rsa_batch(sasas_ptr, residues_arr, n_residues, rsa_ptr) 837 if result != ZSASA_OK: 838 msg = f"RSA batch calculation failed with error code: {result}" 839 raise RuntimeError(msg) 840 841 return rsa_out
Calculate RSA for multiple residues at once (batch operation).
This is more efficient than calling calculate_rsa for each residue individually.
Args: sasas: Array of SASA values in Angstroms². residue_names: List of 3-letter residue codes (must be same length as sasas).
Returns: Array of RSA values. NaN values indicate non-standard amino acids.
Raises: ValueError: If sasas and residue_names have different lengths.
Example:
import numpy as np from zsasa import calculate_rsa_batch sasas = np.array([64.5, 52.0, 100.0]) residues = ["ALA", "GLY", "HOH"] # HOH is not standard rsa = calculate_rsa_batch(sasas, residues) rsa array([0.5 , 0.5 , nan])
40@dataclass 41class ResidueResult: 42 """Per-residue SASA result. 43 44 Attributes: 45 chain_id: Chain identifier (e.g., "A", "B"). 46 residue_id: Residue sequence number. 47 residue_name: 3-letter residue name (e.g., "ALA", "GLY"). 48 total_area: Total SASA for this residue in A^2. 49 polar_area: Polar SASA (N, O atoms, etc.) in A^2. 50 apolar_area: Apolar SASA (C atoms, etc.) in A^2. 51 rsa: Relative Solvent Accessibility (0.0-1.0+), or None for 52 non-standard amino acids. 53 n_atoms: Number of atoms in this residue. 54 """ 55 56 chain_id: str 57 residue_id: int 58 residue_name: str 59 total_area: float 60 polar_area: float 61 apolar_area: float 62 rsa: float | None 63 n_atoms: int 64 65 def __repr__(self) -> str: 66 rsa_str = f"{self.rsa:.3f}" if self.rsa is not None else "None" 67 return ( 68 f"ResidueResult({self.chain_id}:{self.residue_name}{self.residue_id}, " 69 f"total={self.total_area:.1f}, rsa={rsa_str}, n_atoms={self.n_atoms})" 70 )
Per-residue SASA result.
Attributes: chain_id: Chain identifier (e.g., "A", "B"). residue_id: Residue sequence number. residue_name: 3-letter residue name (e.g., "ALA", "GLY"). total_area: Total SASA for this residue in A^2. polar_area: Polar SASA (N, O atoms, etc.) in A^2. apolar_area: Apolar SASA (C atoms, etc.) in A^2. rsa: Relative Solvent Accessibility (0.0-1.0+), or None for non-standard amino acids. n_atoms: Number of atoms in this residue.
73def aggregate_by_residue( 74 atom_areas: NDArray[np.float64], 75 chain_ids: list[str], 76 residue_ids: list[int], 77 residue_names: list[str], 78 atom_classes: NDArray[np.int32] | None = None, 79) -> list[ResidueResult]: 80 """Aggregate per-atom SASA values to per-residue. 81 82 Groups atoms by (chain_id, residue_id) and sums their SASA values. 83 Also calculates polar/apolar breakdown and RSA if atom classes are provided. 84 85 Args: 86 atom_areas: Per-atom SASA values in A^2. 87 chain_ids: Chain ID for each atom. 88 residue_ids: Residue sequence number for each atom. 89 residue_names: Residue name for each atom. 90 atom_classes: Optional per-atom polarity classes (AtomClass values). 91 If provided, polar_area and apolar_area will be calculated. 92 93 Returns: 94 List of ResidueResult objects, one per unique residue. 95 Results are ordered by appearance in the input (preserves chain/residue order). 96 97 Example: 98 >>> import numpy as np 99 >>> from zsasa.analysis import aggregate_by_residue 100 >>> 101 >>> atom_areas = np.array([10.0, 20.0, 15.0, 25.0]) 102 >>> chain_ids = ["A", "A", "A", "A"] 103 >>> residue_ids = [1, 1, 2, 2] 104 >>> residue_names = ["ALA", "ALA", "GLY", "GLY"] 105 >>> 106 >>> residues = aggregate_by_residue( 107 ... atom_areas, chain_ids, residue_ids, residue_names 108 ... ) 109 >>> len(residues) 110 2 111 >>> residues[0].total_area 112 30.0 113 """ 114 n_atoms = len(atom_areas) 115 if n_atoms == 0: 116 return [] 117 118 # Validate input lengths 119 if len(chain_ids) != n_atoms: 120 msg = f"chain_ids length ({len(chain_ids)}) != atom_areas length ({n_atoms})" 121 raise ValueError(msg) 122 if len(residue_ids) != n_atoms: 123 msg = f"residue_ids length ({len(residue_ids)}) != atom_areas length ({n_atoms})" 124 raise ValueError(msg) 125 if len(residue_names) != n_atoms: 126 msg = f"residue_names length ({len(residue_names)}) != atom_areas length ({n_atoms})" 127 raise ValueError(msg) 128 if atom_classes is not None and len(atom_classes) != n_atoms: 129 msg = f"atom_classes length ({len(atom_classes)}) != atom_areas length ({n_atoms})" 130 raise ValueError(msg) 131 132 # Group atoms by (chain_id, residue_id) 133 # Use dict to preserve insertion order (Python 3.7+) 134 residue_data: dict[tuple[str, int], dict] = {} 135 136 for i in range(n_atoms): 137 key = (chain_ids[i], residue_ids[i]) 138 139 if key not in residue_data: 140 residue_data[key] = { 141 "residue_name": residue_names[i], 142 "total_area": 0.0, 143 "polar_area": 0.0, 144 "apolar_area": 0.0, 145 "n_atoms": 0, 146 } 147 148 data = residue_data[key] 149 area = float(atom_areas[i]) 150 data["total_area"] += area 151 data["n_atoms"] += 1 152 153 if atom_classes is not None: 154 atom_class = atom_classes[i] 155 if atom_class == AtomClass.POLAR: 156 data["polar_area"] += area 157 elif atom_class == AtomClass.APOLAR: 158 data["apolar_area"] += area 159 160 # Build result list 161 results = [] 162 for (chain_id, residue_id), data in residue_data.items(): 163 residue_name = data["residue_name"] 164 total_area = data["total_area"] 165 166 # Calculate RSA if this is a standard amino acid 167 max_sasa = MAX_SASA.get(residue_name) 168 rsa = total_area / max_sasa if max_sasa is not None else None 169 170 results.append( 171 ResidueResult( 172 chain_id=chain_id, 173 residue_id=residue_id, 174 residue_name=residue_name, 175 total_area=total_area, 176 polar_area=data["polar_area"], 177 apolar_area=data["apolar_area"], 178 rsa=rsa, 179 n_atoms=data["n_atoms"], 180 ) 181 ) 182 183 return results
Aggregate per-atom SASA values to per-residue.
Groups atoms by (chain_id, residue_id) and sums their SASA values. Also calculates polar/apolar breakdown and RSA if atom classes are provided.
Args: atom_areas: Per-atom SASA values in A^2. chain_ids: Chain ID for each atom. residue_ids: Residue sequence number for each atom. residue_names: Residue name for each atom. atom_classes: Optional per-atom polarity classes (AtomClass values). If provided, polar_area and apolar_area will be calculated.
Returns: List of ResidueResult objects, one per unique residue. Results are ordered by appearance in the input (preserves chain/residue order).
Example:
import numpy as np from zsasa.analysis import aggregate_by_residue
atom_areas = np.array([10.0, 20.0, 15.0, 25.0]) chain_ids = ["A", "A", "A", "A"] residue_ids = [1, 1, 2, 2] residue_names = ["ALA", "ALA", "GLY", "GLY"]
residues = aggregate_by_residue( ... atom_areas, chain_ids, residue_ids, residue_names ... ) len(residues) 2 residues[0].total_area 30.0
186def aggregate_from_result(result: SasaResultWithAtoms) -> list[ResidueResult]: 187 """Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue. 188 189 This is a convenience wrapper that extracts the necessary data from 190 a SasaResultWithAtoms object returned by gemmi integration functions. 191 192 Args: 193 result: A SasaResultWithAtoms object from calculate_sasa_from_structure 194 or calculate_sasa_from_model. 195 196 Returns: 197 List of ResidueResult objects, one per unique residue. 198 199 Example: 200 >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure 201 >>> from zsasa.analysis import aggregate_from_result 202 >>> 203 >>> result = calculate_sasa_from_structure("protein.cif") 204 >>> residues = aggregate_from_result(result) 205 >>> 206 >>> # Print buried residues (RSA < 0.25) 207 >>> for res in residues: 208 ... if res.rsa is not None and res.rsa < 0.25: 209 ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}") 210 """ 211 return aggregate_by_residue( 212 atom_areas=result.atom_areas, 213 chain_ids=result.atom_data.chain_ids, 214 residue_ids=result.atom_data.residue_ids, 215 residue_names=result.atom_data.residue_names, 216 atom_classes=result.atom_classes, 217 )
Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue.
This is a convenience wrapper that extracts the necessary data from a SasaResultWithAtoms object returned by gemmi integration functions.
Args: result: A SasaResultWithAtoms object from calculate_sasa_from_structure or calculate_sasa_from_model.
Returns: List of ResidueResult objects, one per unique residue.
Example:
from zsasa.integrations.gemmi import calculate_sasa_from_structure from zsasa.analysis import aggregate_from_result
result = calculate_sasa_from_structure("protein.cif") residues = aggregate_from_result(result)
Print buried residues (RSA < 0.25)
for res in residues: ... if res.rsa is not None and res.rsa < 0.25: ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}")
259def get_version() -> str: 260 """Get the library version string.""" 261 ffi, lib = _get_lib() 262 return ffi.string(lib.zsasa_version()).decode("utf-8")
Get the library version string.