zsasa

zsasa: Fast SASA calculation using Zig.

This package provides Python bindings for the zsasa library, a high-performance implementation of Solvent Accessible Surface Area (SASA) calculation algorithms.

Example:

import numpy as np from zsasa import calculate_sasa

Single atom

coords = np.array([[0.0, 0.0, 0.0]]) radii = np.array([1.5]) result = calculate_sasa(coords, radii) print(f"Total SASA: {result.total_area:.2f} Ų")

>>> # Classify atoms
>>> from zsasa import classify_atoms, get_radius
>>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"])
>>> print(result.radii)  # [1.87, 1.4]

Integrations: For structure file support, use the gemmi integration:

>>> # pip install zsasa[gemmi]
>>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
>>> result = calculate_sasa_from_structure("protein.cif")
>>> print(f"Total: {result.total_area:.1f} Ų")

Analysis: For per-residue aggregation and RSA calculation:

>>> from zsasa import aggregate_from_result
>>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
>>> result = calculate_sasa_from_structure("protein.cif")
>>> residues = aggregate_from_result(result)
>>> for res in residues:
...     if res.rsa is not None:
...         print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}")

MDTraj Integration: For MD trajectory analysis (requires mdtraj):

>>> # pip install mdtraj
>>> from zsasa.mdtraj import compute_sasa
>>> import mdtraj as md
>>> traj = md.load('trajectory.xtc', top='topology.pdb')
>>> sasa = compute_sasa(traj)  # Returns (n_frames, n_atoms) in nm²

MDAnalysis Integration: For MD trajectory analysis with MDAnalysis (requires MDAnalysis):

>>> # pip install MDAnalysis
>>> import MDAnalysis as mda
>>> from zsasa.mdanalysis import SASAAnalysis
>>> u = mda.Universe('topology.pdb', 'trajectory.xtc')
>>> sasa = SASAAnalysis(u, select='protein')
>>> sasa.run()
>>> print(sasa.results.total_area)  # Returns per-frame SASA in Ų

Native XTC Reader: For standalone XTC reading without MDTraj/MDAnalysis dependencies:

>>> from zsasa.xtc import XtcReader, compute_sasa_trajectory
>>> # Low-level reader API
>>> with XtcReader("trajectory.xtc") as reader:
...     for frame in reader:
...         print(f"Step {frame.step}")
>>>
>>> # High-level SASA calculation (radii from topology)
>>> result = compute_sasa_trajectory("trajectory.xtc", radii)
>>> print(result.total_areas)  # Per-frame SASA in Ų

Native DCD Reader: For standalone DCD reading (NAMD/CHARMM format):

>>> from zsasa.dcd import DcdReader, compute_sasa_trajectory
>>> with DcdReader("trajectory.dcd") as reader:
...     for frame in reader:
...         print(f"Step {frame.step}, {frame.natoms} atoms")
>>>
>>> result = compute_sasa_trajectory("trajectory.dcd", radii)
>>> print(result.total_areas)  # Per-frame SASA in Ų
  1"""zsasa: Fast SASA calculation using Zig.
  2
  3This package provides Python bindings for the zsasa library,
  4a high-performance implementation of Solvent Accessible Surface Area (SASA)
  5calculation algorithms.
  6
  7Example:
  8    >>> import numpy as np
  9    >>> from zsasa import calculate_sasa
 10    >>>
 11    >>> # Single atom
 12    >>> coords = np.array([[0.0, 0.0, 0.0]])
 13    >>> radii = np.array([1.5])
 14    >>> result = calculate_sasa(coords, radii)
 15    >>> print(f"Total SASA: {result.total_area:.2f} Ų")
 16
 17    >>> # Classify atoms
 18    >>> from zsasa import classify_atoms, get_radius
 19    >>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"])
 20    >>> print(result.radii)  # [1.87, 1.4]
 21
 22Integrations:
 23    For structure file support, use the gemmi integration:
 24
 25    >>> # pip install zsasa[gemmi]
 26    >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
 27    >>> result = calculate_sasa_from_structure("protein.cif")
 28    >>> print(f"Total: {result.total_area:.1f} Ų")
 29
 30Analysis:
 31    For per-residue aggregation and RSA calculation:
 32
 33    >>> from zsasa import aggregate_from_result
 34    >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
 35    >>> result = calculate_sasa_from_structure("protein.cif")
 36    >>> residues = aggregate_from_result(result)
 37    >>> for res in residues:
 38    ...     if res.rsa is not None:
 39    ...         print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}")
 40
 41MDTraj Integration:
 42    For MD trajectory analysis (requires mdtraj):
 43
 44    >>> # pip install mdtraj
 45    >>> from zsasa.mdtraj import compute_sasa
 46    >>> import mdtraj as md
 47    >>> traj = md.load('trajectory.xtc', top='topology.pdb')
 48    >>> sasa = compute_sasa(traj)  # Returns (n_frames, n_atoms) in nm²
 49
 50MDAnalysis Integration:
 51    For MD trajectory analysis with MDAnalysis (requires MDAnalysis):
 52
 53    >>> # pip install MDAnalysis
 54    >>> import MDAnalysis as mda
 55    >>> from zsasa.mdanalysis import SASAAnalysis
 56    >>> u = mda.Universe('topology.pdb', 'trajectory.xtc')
 57    >>> sasa = SASAAnalysis(u, select='protein')
 58    >>> sasa.run()
 59    >>> print(sasa.results.total_area)  # Returns per-frame SASA in Ų
 60
 61Native XTC Reader:
 62    For standalone XTC reading without MDTraj/MDAnalysis dependencies:
 63
 64    >>> from zsasa.xtc import XtcReader, compute_sasa_trajectory
 65    >>> # Low-level reader API
 66    >>> with XtcReader("trajectory.xtc") as reader:
 67    ...     for frame in reader:
 68    ...         print(f"Step {frame.step}")
 69    >>>
 70    >>> # High-level SASA calculation (radii from topology)
 71    >>> result = compute_sasa_trajectory("trajectory.xtc", radii)
 72    >>> print(result.total_areas)  # Per-frame SASA in Ų
 73
 74Native DCD Reader:
 75    For standalone DCD reading (NAMD/CHARMM format):
 76
 77    >>> from zsasa.dcd import DcdReader, compute_sasa_trajectory
 78    >>> with DcdReader("trajectory.dcd") as reader:
 79    ...     for frame in reader:
 80    ...         print(f"Step {frame.step}, {frame.natoms} atoms")
 81    >>>
 82    >>> result = compute_sasa_trajectory("trajectory.dcd", radii)
 83    >>> print(result.total_areas)  # Per-frame SASA in Ų
 84"""
 85
 86from zsasa.analysis import (
 87    ResidueResult,
 88    aggregate_by_residue,
 89    aggregate_from_result,
 90)
 91from zsasa.core import (
 92    MAX_SASA,
 93    AtomClass,
 94    BatchDirResult,
 95    BatchSasaResult,
 96    ClassificationResult,
 97    ClassifierType,
 98    SasaResult,
 99    calculate_rsa,
100    calculate_rsa_batch,
101    calculate_sasa,
102    calculate_sasa_batch,
103    classify_atoms,
104    get_atom_class,
105    get_max_sasa,
106    get_radius,
107    get_version,
108    guess_radius,
109    guess_radius_from_atom_name,
110    process_directory,
111)
112
113__all__ = [
114    # SASA calculation
115    "calculate_sasa",
116    "calculate_sasa_batch",
117    "SasaResult",
118    "BatchSasaResult",
119    # Batch directory processing
120    "process_directory",
121    "BatchDirResult",
122    # Classifier
123    "ClassifierType",
124    "AtomClass",
125    "ClassificationResult",
126    "get_radius",
127    "get_atom_class",
128    "guess_radius",
129    "guess_radius_from_atom_name",
130    "classify_atoms",
131    # RSA
132    "MAX_SASA",
133    "get_max_sasa",
134    "calculate_rsa",
135    "calculate_rsa_batch",
136    # Analysis
137    "ResidueResult",
138    "aggregate_by_residue",
139    "aggregate_from_result",
140    # Utility
141    "get_version",
142]
143
144__version__ = get_version()
def calculate_sasa( coords: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], *, algorithm: Literal['sr', 'lr'] = 'sr', n_points: int = 100, n_slices: int = 20, probe_radius: float = 1.4, n_threads: int = 0, use_bitmask: bool = False) -> SasaResult:
278def calculate_sasa(
279    coords: NDArray[np.float64],
280    radii: NDArray[np.float64],
281    *,
282    algorithm: Literal["sr", "lr"] = "sr",
283    n_points: int = 100,
284    n_slices: int = 20,
285    probe_radius: float = 1.4,
286    n_threads: int = 0,
287    use_bitmask: bool = False,
288) -> SasaResult:
289    """Calculate Solvent Accessible Surface Area (SASA).
290
291    Args:
292        coords: Atom coordinates as (N, 3) array.
293        radii: Atom radii as (N,) array.
294        algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards).
295        n_points: Number of test points per atom (for SR algorithm). Default: 100.
296        n_slices: Number of slices per atom (for LR algorithm). Default: 20.
297        probe_radius: Water probe radius in Angstroms. Default: 1.4.
298        n_threads: Number of threads to use. 0 = auto-detect. Default: 0.
299        use_bitmask: Use bitmask LUT optimization for SR algorithm.
300            Supports n_points 1..1024. Default: False.
301
302    Returns:
303        SasaResult containing total_area and per-atom atom_areas.
304
305    Raises:
306        ValueError: If input arrays have invalid shapes or calculation fails.
307
308    Example:
309        >>> import numpy as np
310        >>> from zsasa import calculate_sasa
311        >>>
312        >>> # Two atoms
313        >>> coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]])
314        >>> radii = np.array([1.5, 1.5])
315        >>> result = calculate_sasa(coords, radii)
316        >>> print(f"Total: {result.total_area:.2f}")
317    """
318    ffi, lib = _get_lib()
319
320    # Validate parameters
321    if n_points <= 0:
322        msg = f"n_points must be positive, got {n_points}"
323        raise ValueError(msg)
324    if n_slices <= 0:
325        msg = f"n_slices must be positive, got {n_slices}"
326        raise ValueError(msg)
327    if probe_radius <= 0:
328        msg = f"probe_radius must be positive, got {probe_radius}"
329        raise ValueError(msg)
330    if n_threads < 0:
331        msg = f"n_threads must be non-negative, got {n_threads}"
332        raise ValueError(msg)
333
334    # Validate bitmask constraints
335    if use_bitmask:
336        use_bitmask = _validate_bitmask_params(algorithm, n_points)
337
338    # Validate and convert inputs
339    coords = np.ascontiguousarray(coords, dtype=np.float64)
340    radii = np.ascontiguousarray(radii, dtype=np.float64)
341
342    if coords.ndim != 2 or coords.shape[1] != 3:
343        msg = f"coords must be (N, 3) array, got shape {coords.shape}"
344        raise ValueError(msg)
345
346    n_atoms = coords.shape[0]
347    if radii.shape != (n_atoms,):
348        msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}"
349        raise ValueError(msg)
350
351    if np.any(radii < 0):
352        msg = "All radii must be non-negative"
353        raise ValueError(msg)
354
355    # Extract x, y, z as contiguous arrays
356    x = np.ascontiguousarray(coords[:, 0])
357    y = np.ascontiguousarray(coords[:, 1])
358    z = np.ascontiguousarray(coords[:, 2])
359
360    # Allocate output arrays
361    atom_areas = np.zeros(n_atoms, dtype=np.float64)
362    total_area = ffi.new("double*")
363
364    # Get cffi pointers from numpy arrays
365    x_ptr = ffi.cast("double*", x.ctypes.data)
366    y_ptr = ffi.cast("double*", y.ctypes.data)
367    z_ptr = ffi.cast("double*", z.ctypes.data)
368    radii_ptr = ffi.cast("double*", radii.ctypes.data)
369    areas_ptr = ffi.cast("double*", atom_areas.ctypes.data)
370
371    # Call the appropriate function
372    if use_bitmask:
373        result = lib.zsasa_calc_sr_bitmask(
374            x_ptr,
375            y_ptr,
376            z_ptr,
377            radii_ptr,
378            n_atoms,
379            n_points,
380            probe_radius,
381            n_threads,
382            areas_ptr,
383            total_area,
384        )
385    elif algorithm == "sr":
386        result = lib.zsasa_calc_sr(
387            x_ptr,
388            y_ptr,
389            z_ptr,
390            radii_ptr,
391            n_atoms,
392            n_points,
393            probe_radius,
394            n_threads,
395            areas_ptr,
396            total_area,
397        )
398    elif algorithm == "lr":
399        result = lib.zsasa_calc_lr(
400            x_ptr,
401            y_ptr,
402            z_ptr,
403            radii_ptr,
404            n_atoms,
405            n_slices,
406            probe_radius,
407            n_threads,
408            areas_ptr,
409            total_area,
410        )
411    else:
412        msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
413        raise ValueError(msg)
414
415    # Check for errors
416    if result == ZSASA_ERROR_INVALID_INPUT:
417        msg = "Invalid input to SASA calculation"
418        raise ValueError(msg)
419    elif result == ZSASA_ERROR_OUT_OF_MEMORY:
420        msg = "Out of memory during SASA calculation"
421        raise MemoryError(msg)
422    elif result == ZSASA_ERROR_CALCULATION:
423        msg = "Error during SASA calculation"
424        raise RuntimeError(msg)
425    elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS:
426        msg = (
427            f"Unsupported n_points for bitmask: {n_points}. "
428            f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}"
429        )
430        raise ValueError(msg)
431    elif result != ZSASA_OK:
432        msg = f"Unknown error code: {result}"
433        raise RuntimeError(msg)
434
435    return SasaResult(
436        total_area=total_area[0],
437        atom_areas=atom_areas,
438    )

Calculate Solvent Accessible Surface Area (SASA).

Args: coords: Atom coordinates as (N, 3) array. radii: Atom radii as (N,) array. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.

Returns: SasaResult containing total_area and per-atom atom_areas.

Raises: ValueError: If input arrays have invalid shapes or calculation fails.

Example:

import numpy as np from zsasa import calculate_sasa

Two atoms

coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]]) radii = np.array([1.5, 1.5]) result = calculate_sasa(coords, radii) print(f"Total: {result.total_area:.2f}")

def calculate_sasa_batch( coordinates: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.floating]], radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.floating]], *, algorithm: Literal['sr', 'lr'] = 'sr', n_points: int = 100, n_slices: int = 20, probe_radius: float = 1.4, n_threads: int = 0, precision: Literal['f64', 'f32'] = 'f64', use_bitmask: bool = False) -> BatchSasaResult:
 879def calculate_sasa_batch(
 880    coordinates: NDArray[np.floating],
 881    radii: NDArray[np.floating],
 882    *,
 883    algorithm: Literal["sr", "lr"] = "sr",
 884    n_points: int = 100,
 885    n_slices: int = 20,
 886    probe_radius: float = 1.4,
 887    n_threads: int = 0,
 888    precision: Literal["f64", "f32"] = "f64",
 889    use_bitmask: bool = False,
 890) -> BatchSasaResult:
 891    """Calculate SASA for multiple frames (batch processing).
 892
 893    Optimized for MD trajectory analysis where the same atoms are processed
 894    across multiple frames. Parallelizes across frames for maximum performance.
 895
 896    Args:
 897        coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms.
 898        radii: Atom radii as (n_atoms,) array in Angstroms.
 899        algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards).
 900        n_points: Number of test points per atom (for SR algorithm). Default: 100.
 901        n_slices: Number of slices per atom (for LR algorithm). Default: 20.
 902        probe_radius: Water probe radius in Angstroms. Default: 1.4.
 903        n_threads: Number of threads to use. 0 = auto-detect. Default: 0.
 904        precision: Internal calculation precision: "f64" (default, higher precision)
 905            or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64".
 906        use_bitmask: Use bitmask LUT optimization for SR algorithm.
 907            Supports n_points 1..1024. Default: False.
 908
 909    Returns:
 910        BatchSasaResult containing per-atom SASA for all frames.
 911
 912    Raises:
 913        ValueError: If input arrays have invalid shapes or calculation fails.
 914
 915    Example:
 916        >>> import numpy as np
 917        >>> from zsasa import calculate_sasa_batch
 918        >>>
 919        >>> # 10 frames, 100 atoms
 920        >>> coords = np.random.randn(10, 100, 3).astype(np.float32)
 921        >>> radii = np.full(100, 1.5, dtype=np.float32)
 922        >>> result = calculate_sasa_batch(coords, radii)
 923        >>> print(f"Shape: {result.atom_areas.shape}")  # (10, 100)
 924        >>> print(f"Total SASA per frame: {result.total_areas}")
 925    """
 926    ffi, lib = _get_lib()
 927
 928    # Validate parameters
 929    if n_points <= 0:
 930        msg = f"n_points must be positive, got {n_points}"
 931        raise ValueError(msg)
 932    if n_slices <= 0:
 933        msg = f"n_slices must be positive, got {n_slices}"
 934        raise ValueError(msg)
 935    if probe_radius <= 0:
 936        msg = f"probe_radius must be positive, got {probe_radius}"
 937        raise ValueError(msg)
 938    if n_threads < 0:
 939        msg = f"n_threads must be non-negative, got {n_threads}"
 940        raise ValueError(msg)
 941
 942    # Validate bitmask constraints
 943    if use_bitmask:
 944        use_bitmask = _validate_bitmask_params(algorithm, n_points)
 945
 946    # Validate and convert inputs
 947    coordinates = np.ascontiguousarray(coordinates, dtype=np.float32)
 948    radii = np.ascontiguousarray(radii, dtype=np.float32)
 949
 950    if coordinates.ndim != 3 or coordinates.shape[2] != 3:
 951        msg = f"coordinates must be (n_frames, n_atoms, 3) array, got shape {coordinates.shape}"
 952        raise ValueError(msg)
 953
 954    n_frames = coordinates.shape[0]
 955    n_atoms = coordinates.shape[1]
 956
 957    if radii.shape != (n_atoms,):
 958        msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}"
 959        raise ValueError(msg)
 960
 961    if np.any(radii < 0):
 962        msg = "All radii must be non-negative"
 963        raise ValueError(msg)
 964
 965    # Allocate output array
 966    atom_areas = np.zeros((n_frames, n_atoms), dtype=np.float32)
 967
 968    # Get cffi pointers from numpy arrays
 969    coords_ptr = ffi.cast("float*", coordinates.ctypes.data)
 970    radii_ptr = ffi.cast("float*", radii.ctypes.data)
 971    areas_ptr = ffi.cast("float*", atom_areas.ctypes.data)
 972
 973    # Call the appropriate batch function based on algorithm, precision, and bitmask
 974    if use_bitmask:
 975        if precision == "f32":
 976            result = lib.zsasa_calc_sr_batch_bitmask_f32(
 977                coords_ptr,
 978                n_frames,
 979                n_atoms,
 980                radii_ptr,
 981                n_points,
 982                probe_radius,
 983                n_threads,
 984                areas_ptr,
 985            )
 986        else:
 987            result = lib.zsasa_calc_sr_batch_bitmask(
 988                coords_ptr,
 989                n_frames,
 990                n_atoms,
 991                radii_ptr,
 992                n_points,
 993                probe_radius,
 994                n_threads,
 995                areas_ptr,
 996            )
 997    elif precision == "f64":
 998        # Default: f32 I/O with f64 internal precision
 999        if algorithm == "sr":
1000            result = lib.zsasa_calc_sr_batch(
1001                coords_ptr,
1002                n_frames,
1003                n_atoms,
1004                radii_ptr,
1005                n_points,
1006                probe_radius,
1007                n_threads,
1008                areas_ptr,
1009            )
1010        elif algorithm == "lr":
1011            result = lib.zsasa_calc_lr_batch(
1012                coords_ptr,
1013                n_frames,
1014                n_atoms,
1015                radii_ptr,
1016                n_slices,
1017                probe_radius,
1018                n_threads,
1019                areas_ptr,
1020            )
1021        else:
1022            msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
1023            raise ValueError(msg)
1024    elif precision == "f32":
1025        # Pure f32 precision (matches RustSASA/mdsasa-bolt)
1026        if algorithm == "sr":
1027            result = lib.zsasa_calc_sr_batch_f32(
1028                coords_ptr,
1029                n_frames,
1030                n_atoms,
1031                radii_ptr,
1032                n_points,
1033                probe_radius,
1034                n_threads,
1035                areas_ptr,
1036            )
1037        elif algorithm == "lr":
1038            result = lib.zsasa_calc_lr_batch_f32(
1039                coords_ptr,
1040                n_frames,
1041                n_atoms,
1042                radii_ptr,
1043                n_slices,
1044                probe_radius,
1045                n_threads,
1046                areas_ptr,
1047            )
1048        else:
1049            msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
1050            raise ValueError(msg)
1051    else:
1052        msg = f"Unknown precision: {precision}. Use 'f64' or 'f32'."
1053        raise ValueError(msg)
1054
1055    # Check for errors
1056    if result == ZSASA_ERROR_INVALID_INPUT:
1057        msg = "Invalid input to batch SASA calculation"
1058        raise ValueError(msg)
1059    elif result == ZSASA_ERROR_OUT_OF_MEMORY:
1060        msg = "Out of memory during batch SASA calculation"
1061        raise MemoryError(msg)
1062    elif result == ZSASA_ERROR_CALCULATION:
1063        msg = "Error during batch SASA calculation"
1064        raise RuntimeError(msg)
1065    elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS:
1066        msg = (
1067            f"Unsupported n_points for bitmask: {n_points}. "
1068            f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}"
1069        )
1070        raise ValueError(msg)
1071    elif result != ZSASA_OK:
1072        msg = f"Unknown error code: {result}"
1073        raise RuntimeError(msg)
1074
1075    return BatchSasaResult(atom_areas=atom_areas)

Calculate SASA for multiple frames (batch processing).

Optimized for MD trajectory analysis where the same atoms are processed across multiple frames. Parallelizes across frames for maximum performance.

Args: coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms. radii: Atom radii as (n_atoms,) array in Angstroms. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. precision: Internal calculation precision: "f64" (default, higher precision) or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64". use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.

Returns: BatchSasaResult containing per-atom SASA for all frames.

Raises: ValueError: If input arrays have invalid shapes or calculation fails.

Example:

import numpy as np from zsasa import calculate_sasa_batch

10 frames, 100 atoms

coords = np.random.randn(10, 100, 3).astype(np.float32) radii = np.full(100, 1.5, dtype=np.float32) result = calculate_sasa_batch(coords, radii) print(f"Shape: {result.atom_areas.shape}") # (10, 100) print(f"Total SASA per frame: {result.total_areas}")

@dataclass
class SasaResult:
265@dataclass
266class SasaResult:
267    """Result of SASA calculation.
268
269    Attributes:
270        total_area: Total solvent accessible surface area in Ų.
271        atom_areas: Per-atom SASA values in Ų.
272    """
273
274    total_area: float
275    atom_areas: NDArray[np.float64]

Result of SASA calculation.

Attributes: total_area: Total solvent accessible surface area in Ų. atom_areas: Per-atom SASA values in Ų.

SasaResult( total_area: float, atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]])
total_area: float
atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]]
@dataclass
class BatchSasaResult:
849@dataclass
850class BatchSasaResult:
851    """Result of batch SASA calculation for multiple frames.
852
853    Attributes:
854        atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms).
855                    Values are in Angstrom² (Ų).
856    """
857
858    atom_areas: NDArray[np.float32]
859
860    @property
861    def n_frames(self) -> int:
862        """Number of frames."""
863        return self.atom_areas.shape[0]
864
865    @property
866    def n_atoms(self) -> int:
867        """Number of atoms."""
868        return self.atom_areas.shape[1]
869
870    @property
871    def total_areas(self) -> NDArray[np.float32]:
872        """Total SASA per frame, shape (n_frames,)."""
873        return self.atom_areas.sum(axis=1)
874
875    def __repr__(self) -> str:
876        return f"BatchSasaResult(n_frames={self.n_frames}, n_atoms={self.n_atoms})"

Result of batch SASA calculation for multiple frames.

Attributes: atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms). Values are in Angstrom² (Ų).

BatchSasaResult( atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float32]])
atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float32]]
n_frames: int
860    @property
861    def n_frames(self) -> int:
862        """Number of frames."""
863        return self.atom_areas.shape[0]

Number of frames.

n_atoms: int
865    @property
866    def n_atoms(self) -> int:
867        """Number of atoms."""
868        return self.atom_areas.shape[1]

Number of atoms.

total_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float32]]
870    @property
871    def total_areas(self) -> NDArray[np.float32]:
872        """Total SASA per frame, shape (n_frames,)."""
873        return self.atom_areas.sum(axis=1)

Total SASA per frame, shape (n_frames,).

def process_directory( input_dir: str | pathlib._local.Path, *, output_dir: str | pathlib._local.Path | None = None, algorithm: Literal['sr', 'lr'] = 'sr', n_points: int = 100, n_slices: int = 20, probe_radius: float = 1.4, n_threads: int = 0, classifier: ClassifierType | None = <ClassifierType.PROTOR: 1>, include_hydrogens: bool = False, include_hetatm: bool = False) -> BatchDirResult:
1131def process_directory(
1132    input_dir: str | Path,
1133    *,
1134    output_dir: str | Path | None = None,
1135    algorithm: Literal["sr", "lr"] = "sr",
1136    n_points: int = 100,
1137    n_slices: int = 20,
1138    probe_radius: float = 1.4,
1139    n_threads: int = 0,
1140    classifier: ClassifierType | None = ClassifierType.PROTOR,
1141    include_hydrogens: bool = False,
1142    include_hetatm: bool = False,
1143) -> BatchDirResult:
1144    """Process all supported structure files in a directory for SASA calculation.
1145
1146    Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent),
1147    JSON (.json), and their gzip-compressed variants (.gz).
1148
1149    Args:
1150        input_dir: Path to directory containing structure files.
1151        output_dir: Optional path for per-file output. None = no file output.
1152        algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards).
1153        n_points: Number of test points per atom (SR only; ignored for LR).
1154            Default: 100.
1155        n_slices: Number of slices per atom (LR only; ignored for SR).
1156            Default: 20.
1157        probe_radius: Water probe radius in Angstroms. Default: 1.4.
1158        n_threads: Number of threads to use. 0 = auto-detect. Default: 0.
1159        classifier: Classifier for radius assignment. None = use input radii.
1160            Default: ClassifierType.PROTOR.
1161        include_hydrogens: Whether to include hydrogen atoms. Default: False.
1162        include_hetatm: Whether to include HETATM records. Default: False.
1163
1164    Returns:
1165        BatchDirResult with per-file details.
1166
1167    Raises:
1168        ValueError: If input parameters are invalid.
1169        FileNotFoundError: If the input directory does not exist.
1170        MemoryError: If out of memory.
1171        RuntimeError: For other processing errors.
1172
1173    Example:
1174        >>> from zsasa import process_directory
1175        >>> result = process_directory("path/to/pdbs/")
1176        >>> print(f"Processed {result.successful}/{result.total_files} files")
1177    """
1178    ffi, lib = _get_lib()
1179
1180    # Validate algorithm
1181    if algorithm == "sr":
1182        algo_int = ZSASA_ALGORITHM_SR
1183        n_points_val = n_points
1184    elif algorithm == "lr":
1185        algo_int = ZSASA_ALGORITHM_LR
1186        n_points_val = n_slices
1187    else:
1188        msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
1189        raise ValueError(msg)
1190
1191    if n_points <= 0:
1192        msg = f"n_points must be positive, got {n_points}"
1193        raise ValueError(msg)
1194    if n_slices <= 0:
1195        msg = f"n_slices must be positive, got {n_slices}"
1196        raise ValueError(msg)
1197    if probe_radius <= 0:
1198        msg = f"probe_radius must be positive, got {probe_radius}"
1199        raise ValueError(msg)
1200    if n_threads < 0:
1201        msg = f"n_threads must be non-negative, got {n_threads}"
1202        raise ValueError(msg)
1203
1204    # Map classifier
1205    classifier_int = int(classifier) if classifier is not None else -1
1206
1207    # Convert paths
1208    input_dir_bytes = str(input_dir).encode("utf-8")
1209    output_dir_bytes = str(output_dir).encode("utf-8") if output_dir is not None else ffi.NULL
1210
1211    error_code = ffi.new("int*")
1212
1213    handle = lib.zsasa_batch_dir_process(
1214        input_dir_bytes,
1215        output_dir_bytes,
1216        algo_int,
1217        n_points_val,
1218        probe_radius,
1219        n_threads,
1220        classifier_int,
1221        int(include_hydrogens),
1222        int(include_hetatm),
1223        error_code,
1224    )
1225
1226    if handle == ffi.NULL:
1227        ec = error_code[0]
1228        if ec == ZSASA_ERROR_INVALID_INPUT:
1229            msg = "Invalid input parameters for directory batch processing"
1230            raise ValueError(msg)
1231        elif ec == ZSASA_ERROR_OUT_OF_MEMORY:
1232            msg = "Out of memory during directory batch processing"
1233            raise MemoryError(msg)
1234        elif ec == ZSASA_ERROR_CALCULATION:
1235            msg = "SASA calculation failed during directory batch processing"
1236            raise RuntimeError(msg)
1237        elif ec == ZSASA_ERROR_FILE_IO:
1238            msg = f"Directory not found or not readable: {input_dir}"
1239            raise FileNotFoundError(msg)
1240        else:
1241            msg = f"Directory batch processing failed with error code: {ec}"
1242            raise RuntimeError(msg)
1243
1244    try:
1245        total_files = lib.zsasa_batch_dir_get_total_files(handle)
1246        successful = lib.zsasa_batch_dir_get_successful(handle)
1247        failed = lib.zsasa_batch_dir_get_failed(handle)
1248
1249        filenames: list[str] = []
1250        n_atoms_list: list[int] = []
1251        total_sasa_list: list[float] = []
1252        status_list: list[int] = []
1253
1254        for i in range(total_files):
1255            fname_ptr = lib.zsasa_batch_dir_get_filename(handle, i)
1256            if fname_ptr == ffi.NULL:
1257                msg = (
1258                    f"Internal error: zsasa_batch_dir_get_filename returned NULL "
1259                    f"for index {i} (total_files={total_files})"
1260                )
1261                raise RuntimeError(msg)
1262            filenames.append(ffi.string(fname_ptr).decode("utf-8"))
1263            n_atoms_list.append(lib.zsasa_batch_dir_get_n_atoms(handle, i))
1264            sasa = lib.zsasa_batch_dir_get_total_sasa(handle, i)
1265            total_sasa_list.append(float("nan") if math.isnan(sasa) else sasa)
1266            st = lib.zsasa_batch_dir_get_status(handle, i)
1267            if st not in (0, 1):
1268                msg = f"Internal error: unexpected status {st} for file index {i} (expected 0 or 1)"
1269                raise RuntimeError(msg)
1270            status_list.append(st)
1271
1272        return BatchDirResult(
1273            total_files=total_files,
1274            successful=successful,
1275            failed=failed,
1276            filenames=filenames,
1277            n_atoms=n_atoms_list,
1278            total_sasa=total_sasa_list,
1279            status=status_list,
1280        )
1281    finally:
1282        lib.zsasa_batch_dir_free(handle)

Process all supported structure files in a directory for SASA calculation.

Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent), JSON (.json), and their gzip-compressed variants (.gz).

Args: input_dir: Path to directory containing structure files. output_dir: Optional path for per-file output. None = no file output. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (SR only; ignored for LR). Default: 100. n_slices: Number of slices per atom (LR only; ignored for SR). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. classifier: Classifier for radius assignment. None = use input radii. Default: ClassifierType.PROTOR. include_hydrogens: Whether to include hydrogen atoms. Default: False. include_hetatm: Whether to include HETATM records. Default: False.

Returns: BatchDirResult with per-file details.

Raises: ValueError: If input parameters are invalid. FileNotFoundError: If the input directory does not exist. MemoryError: If out of memory. RuntimeError: For other processing errors.

Example:

from zsasa import process_directory result = process_directory("path/to/pdbs/") print(f"Processed {result.successful}/{result.total_files} files")

@dataclass
class BatchDirResult:
1083@dataclass
1084class BatchDirResult:
1085    """Result of directory batch processing.
1086
1087    Attributes:
1088        total_files: Number of supported structure files found in the directory.
1089        successful: Number of successfully processed files.
1090        failed: Number of files that failed processing.
1091        filenames: List of filenames (file name only, without directory path).
1092        n_atoms: Per-file atom counts.
1093        total_sasa: Per-file total SASA in Angstroms² (NaN for failed files).
1094        status: Per-file status (1=ok, 0=failed).
1095    """
1096
1097    total_files: int
1098    successful: int
1099    failed: int
1100    filenames: list[str]
1101    n_atoms: list[int]
1102    total_sasa: list[float]
1103    status: list[int]
1104
1105    def __post_init__(self) -> None:
1106        n = len(self.filenames)
1107        if len(self.n_atoms) != n or len(self.total_sasa) != n or len(self.status) != n:
1108            msg = (
1109                f"All per-file lists must have the same length as filenames ({n}), "
1110                f"got n_atoms={len(self.n_atoms)}, total_sasa={len(self.total_sasa)}, "
1111                f"status={len(self.status)}"
1112            )
1113            raise ValueError(msg)
1114        if self.total_files != n:
1115            msg = f"total_files ({self.total_files}) != len(filenames) ({n})"
1116            raise ValueError(msg)
1117        if self.successful + self.failed != self.total_files:
1118            msg = (
1119                f"successful ({self.successful}) + failed ({self.failed}) "
1120                f"!= total_files ({self.total_files})"
1121            )
1122            raise ValueError(msg)
1123
1124    def __repr__(self) -> str:
1125        return (
1126            f"BatchDirResult(total_files={self.total_files}, "
1127            f"successful={self.successful}, failed={self.failed})"
1128        )

Result of directory batch processing.

Attributes: total_files: Number of supported structure files found in the directory. successful: Number of successfully processed files. failed: Number of files that failed processing. filenames: List of filenames (file name only, without directory path). n_atoms: Per-file atom counts. total_sasa: Per-file total SASA in Angstroms² (NaN for failed files). status: Per-file status (1=ok, 0=failed).

BatchDirResult( total_files: int, successful: int, failed: int, filenames: list[str], n_atoms: list[int], total_sasa: list[float], status: list[int])
total_files: int
successful: int
failed: int
filenames: list[str]
n_atoms: list[int]
total_sasa: list[float]
status: list[int]
class ClassifierType(enum.IntEnum):
446class ClassifierType(IntEnum):
447    """Available classifier types for atom radius assignment.
448
449    Attributes:
450        NACCESS: NACCESS-compatible radii (default, most commonly used).
451        PROTOR: ProtOr radii based on hybridization state.
452        OONS: Ooi, Oobatake, Nemethy, Scheraga radii (older FreeSASA default).
453    """
454
455    NACCESS = ZSASA_CLASSIFIER_NACCESS
456    PROTOR = ZSASA_CLASSIFIER_PROTOR
457    OONS = ZSASA_CLASSIFIER_OONS

Available classifier types for atom radius assignment.

Attributes: NACCESS: NACCESS-compatible radii (default, most commonly used). PROTOR: ProtOr radii based on hybridization state. OONS: Ooi, Oobatake, Nemethy, Scheraga radii (older FreeSASA default).

NACCESS = <ClassifierType.NACCESS: 0>
PROTOR = <ClassifierType.PROTOR: 1>
OONS = <ClassifierType.OONS: 2>
class AtomClass(enum.IntEnum):
460class AtomClass(IntEnum):
461    """Atom polarity classes.
462
463    Attributes:
464        POLAR: Polar atoms (e.g., N, O).
465        APOLAR: Apolar/hydrophobic atoms (e.g., C).
466        UNKNOWN: Unknown classification.
467    """
468
469    POLAR = ZSASA_ATOM_CLASS_POLAR
470    APOLAR = ZSASA_ATOM_CLASS_APOLAR
471    UNKNOWN = ZSASA_ATOM_CLASS_UNKNOWN

Atom polarity classes.

Attributes: POLAR: Polar atoms (e.g., N, O). APOLAR: Apolar/hydrophobic atoms (e.g., C). UNKNOWN: Unknown classification.

POLAR = <AtomClass.POLAR: 0>
APOLAR = <AtomClass.APOLAR: 1>
UNKNOWN = <AtomClass.UNKNOWN: 2>
@dataclass
class ClassificationResult:
595@dataclass
596class ClassificationResult:
597    """Result of batch atom classification.
598
599    Attributes:
600        radii: Per-atom van der Waals radii in Angstroms.
601               NaN values indicate atoms not found in the classifier.
602               Use np.isnan(result.radii) to find unknown atoms.
603        classes: Per-atom polarity classes (AtomClass constants).
604
605    Example:
606        >>> result = classify_atoms(residues, atoms)
607        >>> unknown_mask = np.isnan(result.radii)
608        >>> if unknown_mask.any():
609        ...     print(f"Found {unknown_mask.sum()} unknown atoms")
610    """
611
612    radii: NDArray[np.float64]
613    classes: NDArray[np.int32]
614
615    def __repr__(self) -> str:
616        return f"ClassificationResult(n_atoms={len(self.radii)})"

Result of batch atom classification.

Attributes: radii: Per-atom van der Waals radii in Angstroms. NaN values indicate atoms not found in the classifier. Use np.isnan(result.radii) to find unknown atoms. classes: Per-atom polarity classes (AtomClass constants).

Example:

result = classify_atoms(residues, atoms) unknown_mask = np.isnan(result.radii) if unknown_mask.any(): ... print(f"Found {unknown_mask.sum()} unknown atoms")

ClassificationResult( radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], classes: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.int32]])
radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]]
classes: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.int32]]
def get_radius( residue: str, atom: str, classifier_type: ClassifierType = <ClassifierType.NACCESS: 0>) -> float | None:
479def get_radius(
480    residue: str,
481    atom: str,
482    classifier_type: ClassifierType = ClassifierType.NACCESS,
483) -> float | None:
484    """Get van der Waals radius for an atom using the specified classifier.
485
486    Args:
487        residue: Residue name (e.g., "ALA", "GLY").
488        atom: Atom name (e.g., "CA", "CB").
489        classifier_type: Classifier to use. Default: ClassifierType.NACCESS.
490
491    Returns:
492        Radius in Angstroms, or None if atom is not found in classifier.
493
494    Example:
495        >>> from zsasa import get_radius, ClassifierType
496        >>> get_radius("ALA", "CA")
497        1.87
498        >>> get_radius("ALA", "XX")  # Unknown atom
499        None
500    """
501    _, lib = _get_lib()
502    radius = lib.zsasa_classifier_get_radius(
503        classifier_type,
504        residue.encode("utf-8"),
505        atom.encode("utf-8"),
506    )
507    if np.isnan(radius):
508        return None
509    return radius

Get van der Waals radius for an atom using the specified classifier.

Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.NACCESS.

Returns: Radius in Angstroms, or None if atom is not found in classifier.

Example:

from zsasa import get_radius, ClassifierType get_radius("ALA", "CA") 1.87 get_radius("ALA", "XX") # Unknown atom None

def get_atom_class( residue: str, atom: str, classifier_type: ClassifierType = <ClassifierType.NACCESS: 0>) -> int:
512def get_atom_class(
513    residue: str,
514    atom: str,
515    classifier_type: ClassifierType = ClassifierType.NACCESS,
516) -> int:
517    """Get atom polarity class using the specified classifier.
518
519    Args:
520        residue: Residue name (e.g., "ALA", "GLY").
521        atom: Atom name (e.g., "CA", "CB").
522        classifier_type: Classifier to use. Default: ClassifierType.NACCESS.
523
524    Returns:
525        AtomClass constant (POLAR, APOLAR, or UNKNOWN).
526
527    Example:
528        >>> from zsasa import get_atom_class, AtomClass
529        >>> get_atom_class("ALA", "CA") == AtomClass.APOLAR
530        True
531        >>> get_atom_class("ALA", "O") == AtomClass.POLAR
532        True
533    """
534    _, lib = _get_lib()
535    return lib.zsasa_classifier_get_class(
536        classifier_type,
537        residue.encode("utf-8"),
538        atom.encode("utf-8"),
539    )

Get atom polarity class using the specified classifier.

Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.NACCESS.

Returns: AtomClass constant (POLAR, APOLAR, or UNKNOWN).

Example:

from zsasa import get_atom_class, AtomClass get_atom_class("ALA", "CA") == AtomClass.APOLAR True get_atom_class("ALA", "O") == AtomClass.POLAR True

def guess_radius(element: str) -> float | None:
542def guess_radius(element: str) -> float | None:
543    """Guess van der Waals radius from element symbol.
544
545    Args:
546        element: Element symbol (e.g., "C", "N", "FE").
547                 Case-insensitive, whitespace is trimmed.
548
549    Returns:
550        Radius in Angstroms, or None if element is not recognized.
551
552    Example:
553        >>> from zsasa import guess_radius
554        >>> guess_radius("C")
555        1.7
556        >>> guess_radius("FE")
557        1.26
558        >>> guess_radius("XX")  # Unknown
559        None
560    """
561    _, lib = _get_lib()
562    radius = lib.zsasa_guess_radius(element.encode("utf-8"))
563    if np.isnan(radius):
564        return None
565    return radius

Guess van der Waals radius from element symbol.

Args: element: Element symbol (e.g., "C", "N", "FE"). Case-insensitive, whitespace is trimmed.

Returns: Radius in Angstroms, or None if element is not recognized.

Example:

from zsasa import guess_radius guess_radius("C") 1.7 guess_radius("FE") 1.26 guess_radius("XX") # Unknown None

def guess_radius_from_atom_name(atom_name: str) -> float | None:
568def guess_radius_from_atom_name(atom_name: str) -> float | None:
569    """Guess van der Waals radius from PDB-style atom name.
570
571    Extracts element symbol from atom name following PDB conventions:
572    - Leading space indicates single-char element (e.g., " CA " = Carbon alpha)
573    - No leading space may indicate 2-char element (e.g., "FE  " = Iron)
574
575    Args:
576        atom_name: PDB-style atom name (e.g., " CA ", "FE  ").
577
578    Returns:
579        Radius in Angstroms, or None if element cannot be determined.
580
581    Example:
582        >>> from zsasa import guess_radius_from_atom_name
583        >>> guess_radius_from_atom_name(" CA ")  # Carbon alpha
584        1.7
585        >>> guess_radius_from_atom_name("FE  ")  # Iron
586        1.26
587    """
588    _, lib = _get_lib()
589    radius = lib.zsasa_guess_radius_from_atom_name(atom_name.encode("utf-8"))
590    if np.isnan(radius):
591        return None
592    return radius

Guess van der Waals radius from PDB-style atom name.

Extracts element symbol from atom name following PDB conventions:

  • Leading space indicates single-char element (e.g., " CA " = Carbon alpha)
  • No leading space may indicate 2-char element (e.g., "FE " = Iron)

Args: atom_name: PDB-style atom name (e.g., " CA ", "FE ").

Returns: Radius in Angstroms, or None if element cannot be determined.

Example:

from zsasa import guess_radius_from_atom_name guess_radius_from_atom_name(" CA ") # Carbon alpha 1.7 guess_radius_from_atom_name("FE ") # Iron 1.26

def classify_atoms( residues: list[str], atoms: list[str], classifier_type: ClassifierType = <ClassifierType.NACCESS: 0>, *, include_classes: bool = True) -> ClassificationResult:
619def classify_atoms(
620    residues: list[str],
621    atoms: list[str],
622    classifier_type: ClassifierType = ClassifierType.NACCESS,
623    *,
624    include_classes: bool = True,
625) -> ClassificationResult:
626    """Classify multiple atoms at once (batch operation).
627
628    This is more efficient than calling get_radius for each atom individually.
629
630    Args:
631        residues: List of residue names.
632        atoms: List of atom names (must be same length as residues).
633        classifier_type: Classifier to use. Default: ClassifierType.NACCESS.
634        include_classes: Whether to compute atom classes. Default: True.
635
636    Returns:
637        ClassificationResult with radii and classes arrays.
638        Unknown atoms have NaN radius and UNKNOWN class.
639
640    Raises:
641        ValueError: If residues and atoms have different lengths.
642
643    Example:
644        >>> from zsasa import classify_atoms
645        >>> result = classify_atoms(
646        ...     ["ALA", "ALA", "GLY"],
647        ...     ["CA", "O", "N"],
648        ... )
649        >>> result.radii
650        array([1.87, 1.4 , 1.65])
651    """
652    ffi, lib = _get_lib()
653
654    if len(residues) != len(atoms):
655        msg = f"residues and atoms must have same length: {len(residues)} != {len(atoms)}"
656        raise ValueError(msg)
657
658    n_atoms = len(residues)
659    if n_atoms == 0:
660        return ClassificationResult(
661            radii=np.array([], dtype=np.float64),
662            classes=np.array([], dtype=np.int32),
663        )
664
665    # Encode strings and create cffi arrays
666    # Keep references to prevent garbage collection
667    residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residues]
668    atoms_bytes = [ffi.new("char[]", a.encode("utf-8")) for a in atoms]
669
670    residues_arr = ffi.new("char*[]", residues_bytes)
671    atoms_arr = ffi.new("char*[]", atoms_bytes)
672
673    # Allocate output arrays
674    radii = np.zeros(n_atoms, dtype=np.float64)
675    classes = np.zeros(n_atoms, dtype=np.int32) if include_classes else None
676
677    radii_ptr = ffi.cast("double*", radii.ctypes.data)
678    classes_ptr = ffi.cast("int*", classes.ctypes.data) if include_classes else ffi.NULL
679
680    result = lib.zsasa_classify_atoms(
681        classifier_type,
682        residues_arr,
683        atoms_arr,
684        n_atoms,
685        radii_ptr,
686        classes_ptr,
687    )
688
689    if result == ZSASA_ERROR_INVALID_INPUT:
690        msg = f"Invalid classifier type: {classifier_type}"
691        raise ValueError(msg)
692    elif result != ZSASA_OK:
693        msg = f"Classification error: {result}"
694        raise RuntimeError(msg)
695
696    if not include_classes:
697        classes = np.full(n_atoms, AtomClass.UNKNOWN, dtype=np.int32)
698
699    return ClassificationResult(radii=radii, classes=classes)

Classify multiple atoms at once (batch operation).

This is more efficient than calling get_radius for each atom individually.

Args: residues: List of residue names. atoms: List of atom names (must be same length as residues). classifier_type: Classifier to use. Default: ClassifierType.NACCESS. include_classes: Whether to compute atom classes. Default: True.

Returns: ClassificationResult with radii and classes arrays. Unknown atoms have NaN radius and UNKNOWN class.

Raises: ValueError: If residues and atoms have different lengths.

Example:

from zsasa import classify_atoms result = classify_atoms( ... ["ALA", "ALA", "GLY"], ... ["CA", "O", "N"], ... ) result.radii array([1.87, 1.4 , 1.65])

MAX_SASA = {'ALA': 129.0, 'ARG': 274.0, 'ASN': 195.0, 'ASP': 193.0, 'CYS': 167.0, 'GLN': 225.0, 'GLU': 223.0, 'GLY': 104.0, 'HIS': 224.0, 'ILE': 197.0, 'LEU': 201.0, 'LYS': 236.0, 'MET': 224.0, 'PHE': 240.0, 'PRO': 159.0, 'SER': 155.0, 'THR': 172.0, 'TRP': 285.0, 'TYR': 263.0, 'VAL': 174.0}
def get_max_sasa(residue_name: str) -> float | None:
731def get_max_sasa(residue_name: str) -> float | None:
732    """Get maximum SASA value for a standard amino acid.
733
734    Values from Tien et al. (2013) "Maximum allowed solvent accessibilities
735    of residues in proteins".
736
737    Args:
738        residue_name: 3-letter residue code (e.g., "ALA", "GLY").
739
740    Returns:
741        Maximum SASA in Angstroms², or None if residue is not a standard amino acid.
742
743    Example:
744        >>> from zsasa import get_max_sasa
745        >>> get_max_sasa("ALA")
746        129.0
747        >>> get_max_sasa("TRP")
748        285.0
749        >>> get_max_sasa("HOH")  # Water - not a standard amino acid
750        None
751    """
752    _, lib = _get_lib()
753    max_sasa = lib.zsasa_get_max_sasa(residue_name.encode("utf-8"))
754    if np.isnan(max_sasa):
755        return None
756    return max_sasa

Get maximum SASA value for a standard amino acid.

Values from Tien et al. (2013) "Maximum allowed solvent accessibilities of residues in proteins".

Args: residue_name: 3-letter residue code (e.g., "ALA", "GLY").

Returns: Maximum SASA in Angstroms², or None if residue is not a standard amino acid.

Example:

from zsasa import get_max_sasa get_max_sasa("ALA") 129.0 get_max_sasa("TRP") 285.0 get_max_sasa("HOH") # Water - not a standard amino acid None

def calculate_rsa(sasa: float, residue_name: str) -> float | None:
759def calculate_rsa(sasa: float, residue_name: str) -> float | None:
760    """Calculate RSA (Relative Solvent Accessibility) for a single residue.
761
762    RSA = SASA / MaxSASA
763
764    Args:
765        sasa: Observed SASA value in Angstroms².
766        residue_name: 3-letter residue code (e.g., "ALA", "GLY").
767
768    Returns:
769        RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid.
770        Note: RSA > 1.0 is possible for exposed terminal residues.
771
772    Example:
773        >>> from zsasa import calculate_rsa
774        >>> calculate_rsa(64.5, "ALA")  # 64.5 / 129.0 = 0.5
775        0.5
776        >>> calculate_rsa(150.0, "GLY")  # RSA > 1.0 is possible
777        1.4423076923076923
778    """
779    _, lib = _get_lib()
780    rsa = lib.zsasa_calculate_rsa(sasa, residue_name.encode("utf-8"))
781    if np.isnan(rsa):
782        return None
783    return rsa

Calculate RSA (Relative Solvent Accessibility) for a single residue.

RSA = SASA / MaxSASA

Args: sasa: Observed SASA value in Angstroms². residue_name: 3-letter residue code (e.g., "ALA", "GLY").

Returns: RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid. Note: RSA > 1.0 is possible for exposed terminal residues.

Example:

from zsasa import calculate_rsa calculate_rsa(64.5, "ALA") # 64.5 / 129.0 = 0.5 0.5 calculate_rsa(150.0, "GLY") # RSA > 1.0 is possible 1.4423076923076923

def calculate_rsa_batch( sasas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]] | list[float], residue_names: list[str]) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]]:
786def calculate_rsa_batch(
787    sasas: NDArray[np.float64] | list[float],
788    residue_names: list[str],
789) -> NDArray[np.float64]:
790    """Calculate RSA for multiple residues at once (batch operation).
791
792    This is more efficient than calling calculate_rsa for each residue individually.
793
794    Args:
795        sasas: Array of SASA values in Angstroms².
796        residue_names: List of 3-letter residue codes (must be same length as sasas).
797
798    Returns:
799        Array of RSA values. NaN values indicate non-standard amino acids.
800
801    Raises:
802        ValueError: If sasas and residue_names have different lengths.
803
804    Example:
805        >>> import numpy as np
806        >>> from zsasa import calculate_rsa_batch
807        >>> sasas = np.array([64.5, 52.0, 100.0])
808        >>> residues = ["ALA", "GLY", "HOH"]  # HOH is not standard
809        >>> rsa = calculate_rsa_batch(sasas, residues)
810        >>> rsa
811        array([0.5       , 0.5       ,        nan])
812    """
813    ffi, lib = _get_lib()
814
815    sasas = np.ascontiguousarray(sasas, dtype=np.float64)
816    n_residues = len(sasas)
817
818    if len(residue_names) != n_residues:
819        msg = f"sasas and residue_names must have same length: {n_residues} != {len(residue_names)}"
820        raise ValueError(msg)
821
822    if n_residues == 0:
823        return np.array([], dtype=np.float64)
824
825    # Encode strings and create cffi array
826    # Keep references to prevent garbage collection
827    residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residue_names]
828    residues_arr = ffi.new("char*[]", residues_bytes)
829
830    # Allocate output array
831    rsa_out = np.zeros(n_residues, dtype=np.float64)
832
833    sasas_ptr = ffi.cast("double*", sasas.ctypes.data)
834    rsa_ptr = ffi.cast("double*", rsa_out.ctypes.data)
835
836    result = lib.zsasa_calculate_rsa_batch(sasas_ptr, residues_arr, n_residues, rsa_ptr)
837    if result != ZSASA_OK:
838        msg = f"RSA batch calculation failed with error code: {result}"
839        raise RuntimeError(msg)
840
841    return rsa_out

Calculate RSA for multiple residues at once (batch operation).

This is more efficient than calling calculate_rsa for each residue individually.

Args: sasas: Array of SASA values in Angstroms². residue_names: List of 3-letter residue codes (must be same length as sasas).

Returns: Array of RSA values. NaN values indicate non-standard amino acids.

Raises: ValueError: If sasas and residue_names have different lengths.

Example:

import numpy as np from zsasa import calculate_rsa_batch sasas = np.array([64.5, 52.0, 100.0]) residues = ["ALA", "GLY", "HOH"] # HOH is not standard rsa = calculate_rsa_batch(sasas, residues) rsa array([0.5 , 0.5 , nan])

@dataclass
class ResidueResult:
40@dataclass
41class ResidueResult:
42    """Per-residue SASA result.
43
44    Attributes:
45        chain_id: Chain identifier (e.g., "A", "B").
46        residue_id: Residue sequence number.
47        residue_name: 3-letter residue name (e.g., "ALA", "GLY").
48        total_area: Total SASA for this residue in A^2.
49        polar_area: Polar SASA (N, O atoms, etc.) in A^2.
50        apolar_area: Apolar SASA (C atoms, etc.) in A^2.
51        rsa: Relative Solvent Accessibility (0.0-1.0+), or None for
52             non-standard amino acids.
53        n_atoms: Number of atoms in this residue.
54    """
55
56    chain_id: str
57    residue_id: int
58    residue_name: str
59    total_area: float
60    polar_area: float
61    apolar_area: float
62    rsa: float | None
63    n_atoms: int
64
65    def __repr__(self) -> str:
66        rsa_str = f"{self.rsa:.3f}" if self.rsa is not None else "None"
67        return (
68            f"ResidueResult({self.chain_id}:{self.residue_name}{self.residue_id}, "
69            f"total={self.total_area:.1f}, rsa={rsa_str}, n_atoms={self.n_atoms})"
70        )

Per-residue SASA result.

Attributes: chain_id: Chain identifier (e.g., "A", "B"). residue_id: Residue sequence number. residue_name: 3-letter residue name (e.g., "ALA", "GLY"). total_area: Total SASA for this residue in A^2. polar_area: Polar SASA (N, O atoms, etc.) in A^2. apolar_area: Apolar SASA (C atoms, etc.) in A^2. rsa: Relative Solvent Accessibility (0.0-1.0+), or None for non-standard amino acids. n_atoms: Number of atoms in this residue.

ResidueResult( chain_id: str, residue_id: int, residue_name: str, total_area: float, polar_area: float, apolar_area: float, rsa: float | None, n_atoms: int)
chain_id: str
residue_id: int
residue_name: str
total_area: float
polar_area: float
apolar_area: float
rsa: float | None
n_atoms: int
def aggregate_by_residue( atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], chain_ids: list[str], residue_ids: list[int], residue_names: list[str], atom_classes: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.int32]] | None = None) -> list[ResidueResult]:
 73def aggregate_by_residue(
 74    atom_areas: NDArray[np.float64],
 75    chain_ids: list[str],
 76    residue_ids: list[int],
 77    residue_names: list[str],
 78    atom_classes: NDArray[np.int32] | None = None,
 79) -> list[ResidueResult]:
 80    """Aggregate per-atom SASA values to per-residue.
 81
 82    Groups atoms by (chain_id, residue_id) and sums their SASA values.
 83    Also calculates polar/apolar breakdown and RSA if atom classes are provided.
 84
 85    Args:
 86        atom_areas: Per-atom SASA values in A^2.
 87        chain_ids: Chain ID for each atom.
 88        residue_ids: Residue sequence number for each atom.
 89        residue_names: Residue name for each atom.
 90        atom_classes: Optional per-atom polarity classes (AtomClass values).
 91                      If provided, polar_area and apolar_area will be calculated.
 92
 93    Returns:
 94        List of ResidueResult objects, one per unique residue.
 95        Results are ordered by appearance in the input (preserves chain/residue order).
 96
 97    Example:
 98        >>> import numpy as np
 99        >>> from zsasa.analysis import aggregate_by_residue
100        >>>
101        >>> atom_areas = np.array([10.0, 20.0, 15.0, 25.0])
102        >>> chain_ids = ["A", "A", "A", "A"]
103        >>> residue_ids = [1, 1, 2, 2]
104        >>> residue_names = ["ALA", "ALA", "GLY", "GLY"]
105        >>>
106        >>> residues = aggregate_by_residue(
107        ...     atom_areas, chain_ids, residue_ids, residue_names
108        ... )
109        >>> len(residues)
110        2
111        >>> residues[0].total_area
112        30.0
113    """
114    n_atoms = len(atom_areas)
115    if n_atoms == 0:
116        return []
117
118    # Validate input lengths
119    if len(chain_ids) != n_atoms:
120        msg = f"chain_ids length ({len(chain_ids)}) != atom_areas length ({n_atoms})"
121        raise ValueError(msg)
122    if len(residue_ids) != n_atoms:
123        msg = f"residue_ids length ({len(residue_ids)}) != atom_areas length ({n_atoms})"
124        raise ValueError(msg)
125    if len(residue_names) != n_atoms:
126        msg = f"residue_names length ({len(residue_names)}) != atom_areas length ({n_atoms})"
127        raise ValueError(msg)
128    if atom_classes is not None and len(atom_classes) != n_atoms:
129        msg = f"atom_classes length ({len(atom_classes)}) != atom_areas length ({n_atoms})"
130        raise ValueError(msg)
131
132    # Group atoms by (chain_id, residue_id)
133    # Use dict to preserve insertion order (Python 3.7+)
134    residue_data: dict[tuple[str, int], dict] = {}
135
136    for i in range(n_atoms):
137        key = (chain_ids[i], residue_ids[i])
138
139        if key not in residue_data:
140            residue_data[key] = {
141                "residue_name": residue_names[i],
142                "total_area": 0.0,
143                "polar_area": 0.0,
144                "apolar_area": 0.0,
145                "n_atoms": 0,
146            }
147
148        data = residue_data[key]
149        area = float(atom_areas[i])
150        data["total_area"] += area
151        data["n_atoms"] += 1
152
153        if atom_classes is not None:
154            atom_class = atom_classes[i]
155            if atom_class == AtomClass.POLAR:
156                data["polar_area"] += area
157            elif atom_class == AtomClass.APOLAR:
158                data["apolar_area"] += area
159
160    # Build result list
161    results = []
162    for (chain_id, residue_id), data in residue_data.items():
163        residue_name = data["residue_name"]
164        total_area = data["total_area"]
165
166        # Calculate RSA if this is a standard amino acid
167        max_sasa = MAX_SASA.get(residue_name)
168        rsa = total_area / max_sasa if max_sasa is not None else None
169
170        results.append(
171            ResidueResult(
172                chain_id=chain_id,
173                residue_id=residue_id,
174                residue_name=residue_name,
175                total_area=total_area,
176                polar_area=data["polar_area"],
177                apolar_area=data["apolar_area"],
178                rsa=rsa,
179                n_atoms=data["n_atoms"],
180            )
181        )
182
183    return results

Aggregate per-atom SASA values to per-residue.

Groups atoms by (chain_id, residue_id) and sums their SASA values. Also calculates polar/apolar breakdown and RSA if atom classes are provided.

Args: atom_areas: Per-atom SASA values in A^2. chain_ids: Chain ID for each atom. residue_ids: Residue sequence number for each atom. residue_names: Residue name for each atom. atom_classes: Optional per-atom polarity classes (AtomClass values). If provided, polar_area and apolar_area will be calculated.

Returns: List of ResidueResult objects, one per unique residue. Results are ordered by appearance in the input (preserves chain/residue order).

Example:

import numpy as np from zsasa.analysis import aggregate_by_residue

atom_areas = np.array([10.0, 20.0, 15.0, 25.0]) chain_ids = ["A", "A", "A", "A"] residue_ids = [1, 1, 2, 2] residue_names = ["ALA", "ALA", "GLY", "GLY"]

residues = aggregate_by_residue( ... atom_areas, chain_ids, residue_ids, residue_names ... ) len(residues) 2 residues[0].total_area 30.0

def aggregate_from_result( result: zsasa.integrations._types.SasaResultWithAtoms) -> list[ResidueResult]:
186def aggregate_from_result(result: SasaResultWithAtoms) -> list[ResidueResult]:
187    """Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue.
188
189    This is a convenience wrapper that extracts the necessary data from
190    a SasaResultWithAtoms object returned by gemmi integration functions.
191
192    Args:
193        result: A SasaResultWithAtoms object from calculate_sasa_from_structure
194                or calculate_sasa_from_model.
195
196    Returns:
197        List of ResidueResult objects, one per unique residue.
198
199    Example:
200        >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
201        >>> from zsasa.analysis import aggregate_from_result
202        >>>
203        >>> result = calculate_sasa_from_structure("protein.cif")
204        >>> residues = aggregate_from_result(result)
205        >>>
206        >>> # Print buried residues (RSA < 0.25)
207        >>> for res in residues:
208        ...     if res.rsa is not None and res.rsa < 0.25:
209        ...         print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}")
210    """
211    return aggregate_by_residue(
212        atom_areas=result.atom_areas,
213        chain_ids=result.atom_data.chain_ids,
214        residue_ids=result.atom_data.residue_ids,
215        residue_names=result.atom_data.residue_names,
216        atom_classes=result.atom_classes,
217    )

Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue.

This is a convenience wrapper that extracts the necessary data from a SasaResultWithAtoms object returned by gemmi integration functions.

Args: result: A SasaResultWithAtoms object from calculate_sasa_from_structure or calculate_sasa_from_model.

Returns: List of ResidueResult objects, one per unique residue.

Example:

from zsasa.integrations.gemmi import calculate_sasa_from_structure from zsasa.analysis import aggregate_from_result

result = calculate_sasa_from_structure("protein.cif") residues = aggregate_from_result(result)

Print buried residues (RSA < 0.25)

for res in residues: ... if res.rsa is not None and res.rsa < 0.25: ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}")

def get_version() -> str:
259def get_version() -> str:
260    """Get the library version string."""
261    ffi, lib = _get_lib()
262    return ffi.string(lib.zsasa_version()).decode("utf-8")

Get the library version string.