zsasa

zsasa: Fast SASA calculation using Zig.

This package provides Python bindings for the zsasa library, a high-performance implementation of Solvent Accessible Surface Area (SASA) calculation algorithms.

Example:

import numpy as np from zsasa import calculate_sasa

Single atom

coords = np.array([[0.0, 0.0, 0.0]]) radii = np.array([1.5]) result = calculate_sasa(coords, radii) print(f"Total SASA: {result.total_area:.2f} Ų")

>>> # Classify atoms
>>> from zsasa import classify_atoms, get_radius
>>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"])
>>> print(result.radii)  # [1.87, 1.4]

Integrations: For structure file support, use the gemmi integration:

>>> # pip install zsasa[gemmi]
>>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
>>> result = calculate_sasa_from_structure("protein.cif")
>>> print(f"Total: {result.total_area:.1f} Ų")

Analysis: For per-residue aggregation and RSA calculation:

>>> from zsasa import aggregate_from_result
>>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
>>> result = calculate_sasa_from_structure("protein.cif")
>>> residues = aggregate_from_result(result)
>>> for res in residues:
...     if res.rsa is not None:
...         print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}")

MDTraj Integration: For MD trajectory analysis (requires mdtraj):

>>> # pip install mdtraj
>>> from zsasa.mdtraj import compute_sasa
>>> import mdtraj as md
>>> traj = md.load('trajectory.xtc', top='topology.pdb')
>>> sasa = compute_sasa(traj)  # Returns (n_frames, n_atoms) in nm²

MDAnalysis Integration: For MD trajectory analysis with MDAnalysis (requires MDAnalysis):

>>> # pip install MDAnalysis
>>> import MDAnalysis as mda
>>> from zsasa.mdanalysis import SASAAnalysis
>>> u = mda.Universe('topology.pdb', 'trajectory.xtc')
>>> sasa = SASAAnalysis(u, select='protein')
>>> sasa.run()
>>> print(sasa.results.total_area)  # Returns per-frame SASA in Ų

Native XTC Reader: For standalone XTC reading without MDTraj/MDAnalysis dependencies:

>>> from zsasa.xtc import XtcReader, compute_sasa_trajectory
>>> # Low-level reader API
>>> with XtcReader("trajectory.xtc") as reader:
...     for frame in reader:
...         print(f"Step {frame.step}")
>>>
>>> # High-level SASA calculation (radii from topology)
>>> result = compute_sasa_trajectory("trajectory.xtc", radii)
>>> print(result.total_areas)  # Per-frame SASA in Ų

Native DCD Reader: For standalone DCD reading (NAMD/CHARMM format):

>>> from zsasa.dcd import DcdReader, compute_sasa_trajectory
>>> with DcdReader("trajectory.dcd") as reader:
...     for frame in reader:
...         print(f"Step {frame.step}, {frame.natoms} atoms")
>>>
>>> result = compute_sasa_trajectory("trajectory.dcd", radii)
>>> print(result.total_areas)  # Per-frame SASA in Ų
  1"""zsasa: Fast SASA calculation using Zig.
  2
  3This package provides Python bindings for the zsasa library,
  4a high-performance implementation of Solvent Accessible Surface Area (SASA)
  5calculation algorithms.
  6
  7Example:
  8    >>> import numpy as np
  9    >>> from zsasa import calculate_sasa
 10    >>>
 11    >>> # Single atom
 12    >>> coords = np.array([[0.0, 0.0, 0.0]])
 13    >>> radii = np.array([1.5])
 14    >>> result = calculate_sasa(coords, radii)
 15    >>> print(f"Total SASA: {result.total_area:.2f} Ų")
 16
 17    >>> # Classify atoms
 18    >>> from zsasa import classify_atoms, get_radius
 19    >>> result = classify_atoms(["ALA", "ALA"], ["CA", "O"])
 20    >>> print(result.radii)  # [1.87, 1.4]
 21
 22Integrations:
 23    For structure file support, use the gemmi integration:
 24
 25    >>> # pip install zsasa[gemmi]
 26    >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
 27    >>> result = calculate_sasa_from_structure("protein.cif")
 28    >>> print(f"Total: {result.total_area:.1f} Ų")
 29
 30Analysis:
 31    For per-residue aggregation and RSA calculation:
 32
 33    >>> from zsasa import aggregate_from_result
 34    >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
 35    >>> result = calculate_sasa_from_structure("protein.cif")
 36    >>> residues = aggregate_from_result(result)
 37    >>> for res in residues:
 38    ...     if res.rsa is not None:
 39    ...         print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: RSA={res.rsa:.1%}")
 40
 41MDTraj Integration:
 42    For MD trajectory analysis (requires mdtraj):
 43
 44    >>> # pip install mdtraj
 45    >>> from zsasa.mdtraj import compute_sasa
 46    >>> import mdtraj as md
 47    >>> traj = md.load('trajectory.xtc', top='topology.pdb')
 48    >>> sasa = compute_sasa(traj)  # Returns (n_frames, n_atoms) in nm²
 49
 50MDAnalysis Integration:
 51    For MD trajectory analysis with MDAnalysis (requires MDAnalysis):
 52
 53    >>> # pip install MDAnalysis
 54    >>> import MDAnalysis as mda
 55    >>> from zsasa.mdanalysis import SASAAnalysis
 56    >>> u = mda.Universe('topology.pdb', 'trajectory.xtc')
 57    >>> sasa = SASAAnalysis(u, select='protein')
 58    >>> sasa.run()
 59    >>> print(sasa.results.total_area)  # Returns per-frame SASA in Ų
 60
 61Native XTC Reader:
 62    For standalone XTC reading without MDTraj/MDAnalysis dependencies:
 63
 64    >>> from zsasa.xtc import XtcReader, compute_sasa_trajectory
 65    >>> # Low-level reader API
 66    >>> with XtcReader("trajectory.xtc") as reader:
 67    ...     for frame in reader:
 68    ...         print(f"Step {frame.step}")
 69    >>>
 70    >>> # High-level SASA calculation (radii from topology)
 71    >>> result = compute_sasa_trajectory("trajectory.xtc", radii)
 72    >>> print(result.total_areas)  # Per-frame SASA in Ų
 73
 74Native DCD Reader:
 75    For standalone DCD reading (NAMD/CHARMM format):
 76
 77    >>> from zsasa.dcd import DcdReader, compute_sasa_trajectory
 78    >>> with DcdReader("trajectory.dcd") as reader:
 79    ...     for frame in reader:
 80    ...         print(f"Step {frame.step}, {frame.natoms} atoms")
 81    >>>
 82    >>> result = compute_sasa_trajectory("trajectory.dcd", radii)
 83    >>> print(result.total_areas)  # Per-frame SASA in Ų
 84"""
 85
 86from zsasa._ffi import get_version
 87from zsasa.analysis import (
 88    ResidueResult,
 89    aggregate_by_residue,
 90    aggregate_from_result,
 91)
 92from zsasa.batch import BatchDirResult, process_directory
 93from zsasa.classifier import (
 94    AtomClass,
 95    ClassificationResult,
 96    ClassifierType,
 97    classify_atoms,
 98    get_atom_class,
 99    get_radius,
100    guess_radius,
101    guess_radius_from_atom_name,
102)
103from zsasa.rsa import (
104    MAX_SASA,
105    calculate_rsa,
106    calculate_rsa_batch,
107    get_max_sasa,
108)
109from zsasa.sasa import (
110    BatchSasaResult,
111    SasaResult,
112    calculate_sasa,
113    calculate_sasa_batch,
114)
115
116__all__ = [
117    # SASA calculation
118    "calculate_sasa",
119    "calculate_sasa_batch",
120    "SasaResult",
121    "BatchSasaResult",
122    # Batch directory processing
123    "process_directory",
124    "BatchDirResult",
125    # Classifier
126    "ClassifierType",
127    "AtomClass",
128    "ClassificationResult",
129    "get_radius",
130    "get_atom_class",
131    "guess_radius",
132    "guess_radius_from_atom_name",
133    "classify_atoms",
134    # RSA
135    "MAX_SASA",
136    "get_max_sasa",
137    "calculate_rsa",
138    "calculate_rsa_batch",
139    # Analysis
140    "ResidueResult",
141    "aggregate_by_residue",
142    "aggregate_from_result",
143    # Utility
144    "get_version",
145]
146
147__version__ = get_version()
def calculate_sasa( coords: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], *, algorithm: Literal['sr', 'lr'] = 'sr', n_points: int = 100, n_slices: int = 20, probe_radius: float = 1.4, n_threads: int = 0, use_bitmask: bool = False) -> SasaResult:
 38def calculate_sasa(
 39    coords: NDArray[np.float64],
 40    radii: NDArray[np.float64],
 41    *,
 42    algorithm: Literal["sr", "lr"] = "sr",
 43    n_points: int = 100,
 44    n_slices: int = 20,
 45    probe_radius: float = 1.4,
 46    n_threads: int = 0,
 47    use_bitmask: bool = False,
 48) -> SasaResult:
 49    """Calculate Solvent Accessible Surface Area (SASA).
 50
 51    Args:
 52        coords: Atom coordinates as (N, 3) array.
 53        radii: Atom radii as (N,) array.
 54        algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards).
 55        n_points: Number of test points per atom (for SR algorithm). Default: 100.
 56        n_slices: Number of slices per atom (for LR algorithm). Default: 20.
 57        probe_radius: Water probe radius in Angstroms. Default: 1.4.
 58        n_threads: Number of threads to use. 0 = auto-detect. Default: 0.
 59        use_bitmask: Use bitmask LUT optimization for SR algorithm.
 60            Supports n_points 1..1024. Default: False.
 61
 62    Returns:
 63        SasaResult containing total_area and per-atom atom_areas.
 64
 65    Raises:
 66        ValueError: If input arrays have invalid shapes or calculation fails.
 67
 68    Example:
 69        >>> import numpy as np
 70        >>> from zsasa import calculate_sasa
 71        >>>
 72        >>> # Two atoms
 73        >>> coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]])
 74        >>> radii = np.array([1.5, 1.5])
 75        >>> result = calculate_sasa(coords, radii)
 76        >>> print(f"Total: {result.total_area:.2f}")
 77    """
 78    ffi, lib = _get_lib()
 79
 80    # Validate parameters
 81    if n_points <= 0:
 82        msg = f"n_points must be positive, got {n_points}"
 83        raise ValueError(msg)
 84    if n_slices <= 0:
 85        msg = f"n_slices must be positive, got {n_slices}"
 86        raise ValueError(msg)
 87    if probe_radius <= 0:
 88        msg = f"probe_radius must be positive, got {probe_radius}"
 89        raise ValueError(msg)
 90    if n_threads < 0:
 91        msg = f"n_threads must be non-negative, got {n_threads}"
 92        raise ValueError(msg)
 93
 94    # Validate bitmask constraints
 95    if use_bitmask:
 96        use_bitmask = _validate_bitmask_params(algorithm, n_points)
 97
 98    # Validate and convert inputs
 99    coords = np.ascontiguousarray(coords, dtype=np.float64)
100    radii = np.ascontiguousarray(radii, dtype=np.float64)
101
102    if coords.ndim != 2 or coords.shape[1] != 3:
103        msg = f"coords must be (N, 3) array, got shape {coords.shape}"
104        raise ValueError(msg)
105
106    n_atoms = coords.shape[0]
107    if radii.shape != (n_atoms,):
108        msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}"
109        raise ValueError(msg)
110
111    if np.any(radii < 0):
112        msg = "All radii must be non-negative"
113        raise ValueError(msg)
114
115    # Extract x, y, z as contiguous arrays
116    x = np.ascontiguousarray(coords[:, 0])
117    y = np.ascontiguousarray(coords[:, 1])
118    z = np.ascontiguousarray(coords[:, 2])
119
120    # Allocate output arrays
121    atom_areas = np.zeros(n_atoms, dtype=np.float64)
122    total_area = ffi.new("double*")
123
124    # Get cffi pointers from numpy arrays
125    x_ptr = ffi.cast("double*", x.ctypes.data)
126    y_ptr = ffi.cast("double*", y.ctypes.data)
127    z_ptr = ffi.cast("double*", z.ctypes.data)
128    radii_ptr = ffi.cast("double*", radii.ctypes.data)
129    areas_ptr = ffi.cast("double*", atom_areas.ctypes.data)
130
131    # Call the appropriate function
132    if use_bitmask:
133        result = lib.zsasa_calc_sr_bitmask(
134            x_ptr,
135            y_ptr,
136            z_ptr,
137            radii_ptr,
138            n_atoms,
139            n_points,
140            probe_radius,
141            n_threads,
142            areas_ptr,
143            total_area,
144        )
145    elif algorithm == "sr":
146        result = lib.zsasa_calc_sr(
147            x_ptr,
148            y_ptr,
149            z_ptr,
150            radii_ptr,
151            n_atoms,
152            n_points,
153            probe_radius,
154            n_threads,
155            areas_ptr,
156            total_area,
157        )
158    elif algorithm == "lr":
159        result = lib.zsasa_calc_lr(
160            x_ptr,
161            y_ptr,
162            z_ptr,
163            radii_ptr,
164            n_atoms,
165            n_slices,
166            probe_radius,
167            n_threads,
168            areas_ptr,
169            total_area,
170        )
171    else:
172        msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
173        raise ValueError(msg)
174
175    # Check for errors
176    if result == ZSASA_ERROR_INVALID_INPUT:
177        msg = "Invalid input to SASA calculation"
178        raise ValueError(msg)
179    elif result == ZSASA_ERROR_OUT_OF_MEMORY:
180        msg = "Out of memory during SASA calculation"
181        raise MemoryError(msg)
182    elif result == ZSASA_ERROR_CALCULATION:
183        msg = "Error during SASA calculation"
184        raise RuntimeError(msg)
185    elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS:
186        msg = (
187            f"Unsupported n_points for bitmask: {n_points}. "
188            f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}"
189        )
190        raise ValueError(msg)
191    elif result != ZSASA_OK:
192        msg = f"Unknown error code: {result}"
193        raise RuntimeError(msg)
194
195    return SasaResult(
196        total_area=total_area[0],
197        atom_areas=atom_areas,
198    )

Calculate Solvent Accessible Surface Area (SASA).

Args: coords: Atom coordinates as (N, 3) array. radii: Atom radii as (N,) array. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.

Returns: SasaResult containing total_area and per-atom atom_areas.

Raises: ValueError: If input arrays have invalid shapes or calculation fails.

Example:

import numpy as np from zsasa import calculate_sasa

Two atoms

coords = np.array([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0]]) radii = np.array([1.5, 1.5]) result = calculate_sasa(coords, radii) print(f"Total: {result.total_area:.2f}")

def calculate_sasa_batch( coordinates: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.floating]], radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.floating]], *, algorithm: Literal['sr', 'lr'] = 'sr', n_points: int = 100, n_slices: int = 20, probe_radius: float = 1.4, n_threads: int = 0, precision: Literal['f64', 'f32'] = 'f64', use_bitmask: bool = False) -> BatchSasaResult:
231def calculate_sasa_batch(
232    coordinates: NDArray[np.floating],
233    radii: NDArray[np.floating],
234    *,
235    algorithm: Literal["sr", "lr"] = "sr",
236    n_points: int = 100,
237    n_slices: int = 20,
238    probe_radius: float = 1.4,
239    n_threads: int = 0,
240    precision: Literal["f64", "f32"] = "f64",
241    use_bitmask: bool = False,
242) -> BatchSasaResult:
243    """Calculate SASA for multiple frames (batch processing).
244
245    Optimized for MD trajectory analysis where the same atoms are processed
246    across multiple frames. Parallelizes across frames for maximum performance.
247
248    Args:
249        coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms.
250        radii: Atom radii as (n_atoms,) array in Angstroms.
251        algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards).
252        n_points: Number of test points per atom (for SR algorithm). Default: 100.
253        n_slices: Number of slices per atom (for LR algorithm). Default: 20.
254        probe_radius: Water probe radius in Angstroms. Default: 1.4.
255        n_threads: Number of threads to use. 0 = auto-detect. Default: 0.
256        precision: Internal calculation precision: "f64" (default, higher precision)
257            or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64".
258        use_bitmask: Use bitmask LUT optimization for SR algorithm.
259            Supports n_points 1..1024. Default: False.
260
261    Returns:
262        BatchSasaResult containing per-atom SASA for all frames.
263
264    Raises:
265        ValueError: If input arrays have invalid shapes or calculation fails.
266
267    Example:
268        >>> import numpy as np
269        >>> from zsasa import calculate_sasa_batch
270        >>>
271        >>> # 10 frames, 100 atoms
272        >>> coords = np.random.randn(10, 100, 3).astype(np.float32)
273        >>> radii = np.full(100, 1.5, dtype=np.float32)
274        >>> result = calculate_sasa_batch(coords, radii)
275        >>> print(f"Shape: {result.atom_areas.shape}")  # (10, 100)
276        >>> print(f"Total SASA per frame: {result.total_areas}")
277    """
278    ffi, lib = _get_lib()
279
280    # Validate parameters
281    if n_points <= 0:
282        msg = f"n_points must be positive, got {n_points}"
283        raise ValueError(msg)
284    if n_slices <= 0:
285        msg = f"n_slices must be positive, got {n_slices}"
286        raise ValueError(msg)
287    if probe_radius <= 0:
288        msg = f"probe_radius must be positive, got {probe_radius}"
289        raise ValueError(msg)
290    if n_threads < 0:
291        msg = f"n_threads must be non-negative, got {n_threads}"
292        raise ValueError(msg)
293
294    # Validate bitmask constraints
295    if use_bitmask:
296        use_bitmask = _validate_bitmask_params(algorithm, n_points)
297
298    # Validate and convert inputs
299    coordinates = np.ascontiguousarray(coordinates, dtype=np.float32)
300    radii = np.ascontiguousarray(radii, dtype=np.float32)
301
302    if coordinates.ndim != 3 or coordinates.shape[2] != 3:
303        msg = f"coordinates must be (n_frames, n_atoms, 3) array, got shape {coordinates.shape}"
304        raise ValueError(msg)
305
306    n_frames = coordinates.shape[0]
307    n_atoms = coordinates.shape[1]
308
309    if radii.shape != (n_atoms,):
310        msg = f"radii must be ({n_atoms},) array, got shape {radii.shape}"
311        raise ValueError(msg)
312
313    if np.any(radii < 0):
314        msg = "All radii must be non-negative"
315        raise ValueError(msg)
316
317    # Allocate output array
318    atom_areas = np.zeros((n_frames, n_atoms), dtype=np.float32)
319
320    # Get cffi pointers from numpy arrays
321    coords_ptr = ffi.cast("float*", coordinates.ctypes.data)
322    radii_ptr = ffi.cast("float*", radii.ctypes.data)
323    areas_ptr = ffi.cast("float*", atom_areas.ctypes.data)
324
325    # Call the appropriate batch function based on algorithm, precision, and bitmask
326    if use_bitmask:
327        if precision == "f32":
328            result = lib.zsasa_calc_sr_batch_bitmask_f32(
329                coords_ptr,
330                n_frames,
331                n_atoms,
332                radii_ptr,
333                n_points,
334                probe_radius,
335                n_threads,
336                areas_ptr,
337            )
338        else:
339            result = lib.zsasa_calc_sr_batch_bitmask(
340                coords_ptr,
341                n_frames,
342                n_atoms,
343                radii_ptr,
344                n_points,
345                probe_radius,
346                n_threads,
347                areas_ptr,
348            )
349    elif precision == "f64":
350        # Default: f32 I/O with f64 internal precision
351        if algorithm == "sr":
352            result = lib.zsasa_calc_sr_batch(
353                coords_ptr,
354                n_frames,
355                n_atoms,
356                radii_ptr,
357                n_points,
358                probe_radius,
359                n_threads,
360                areas_ptr,
361            )
362        elif algorithm == "lr":
363            result = lib.zsasa_calc_lr_batch(
364                coords_ptr,
365                n_frames,
366                n_atoms,
367                radii_ptr,
368                n_slices,
369                probe_radius,
370                n_threads,
371                areas_ptr,
372            )
373        else:
374            msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
375            raise ValueError(msg)
376    elif precision == "f32":
377        # Pure f32 precision (matches RustSASA/mdsasa-bolt)
378        if algorithm == "sr":
379            result = lib.zsasa_calc_sr_batch_f32(
380                coords_ptr,
381                n_frames,
382                n_atoms,
383                radii_ptr,
384                n_points,
385                probe_radius,
386                n_threads,
387                areas_ptr,
388            )
389        elif algorithm == "lr":
390            result = lib.zsasa_calc_lr_batch_f32(
391                coords_ptr,
392                n_frames,
393                n_atoms,
394                radii_ptr,
395                n_slices,
396                probe_radius,
397                n_threads,
398                areas_ptr,
399            )
400        else:
401            msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
402            raise ValueError(msg)
403    else:
404        msg = f"Unknown precision: {precision}. Use 'f64' or 'f32'."
405        raise ValueError(msg)
406
407    # Check for errors
408    if result == ZSASA_ERROR_INVALID_INPUT:
409        msg = "Invalid input to batch SASA calculation"
410        raise ValueError(msg)
411    elif result == ZSASA_ERROR_OUT_OF_MEMORY:
412        msg = "Out of memory during batch SASA calculation"
413        raise MemoryError(msg)
414    elif result == ZSASA_ERROR_CALCULATION:
415        msg = "Error during batch SASA calculation"
416        raise RuntimeError(msg)
417    elif result == ZSASA_ERROR_UNSUPPORTED_N_POINTS:
418        msg = (
419            f"Unsupported n_points for bitmask: {n_points}. "
420            f"Must be {_BITMASK_MIN_N_POINTS}..{_BITMASK_MAX_N_POINTS}"
421        )
422        raise ValueError(msg)
423    elif result != ZSASA_OK:
424        msg = f"Unknown error code: {result}"
425        raise RuntimeError(msg)
426
427    return BatchSasaResult(atom_areas=atom_areas)

Calculate SASA for multiple frames (batch processing).

Optimized for MD trajectory analysis where the same atoms are processed across multiple frames. Parallelizes across frames for maximum performance.

Args: coordinates: Atom coordinates as (n_frames, n_atoms, 3) array in Angstroms. radii: Atom radii as (n_atoms,) array in Angstroms. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (for SR algorithm). Default: 100. n_slices: Number of slices per atom (for LR algorithm). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. precision: Internal calculation precision: "f64" (default, higher precision) or "f32" (matches RustSASA/mdsasa-bolt for comparison). Default: "f64". use_bitmask: Use bitmask LUT optimization for SR algorithm. Supports n_points 1..1024. Default: False.

Returns: BatchSasaResult containing per-atom SASA for all frames.

Raises: ValueError: If input arrays have invalid shapes or calculation fails.

Example:

import numpy as np from zsasa import calculate_sasa_batch

10 frames, 100 atoms

coords = np.random.randn(10, 100, 3).astype(np.float32) radii = np.full(100, 1.5, dtype=np.float32) result = calculate_sasa_batch(coords, radii) print(f"Shape: {result.atom_areas.shape}") # (10, 100) print(f"Total SASA per frame: {result.total_areas}")

@dataclass
class SasaResult:
25@dataclass
26class SasaResult:
27    """Result of SASA calculation.
28
29    Attributes:
30        total_area: Total solvent accessible surface area in Ų.
31        atom_areas: Per-atom SASA values in Ų.
32    """
33
34    total_area: float
35    atom_areas: NDArray[np.float64]

Result of SASA calculation.

Attributes: total_area: Total solvent accessible surface area in Ų. atom_areas: Per-atom SASA values in Ų.

SasaResult( total_area: float, atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]])
total_area: float
atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]]
@dataclass
class BatchSasaResult:
201@dataclass
202class BatchSasaResult:
203    """Result of batch SASA calculation for multiple frames.
204
205    Attributes:
206        atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms).
207                    Values are in Angstrom² (Ų).
208    """
209
210    atom_areas: NDArray[np.float32]
211
212    @property
213    def n_frames(self) -> int:
214        """Number of frames."""
215        return self.atom_areas.shape[0]
216
217    @property
218    def n_atoms(self) -> int:
219        """Number of atoms."""
220        return self.atom_areas.shape[1]
221
222    @property
223    def total_areas(self) -> NDArray[np.float32]:
224        """Total SASA per frame, shape (n_frames,)."""
225        return self.atom_areas.sum(axis=1)
226
227    def __repr__(self) -> str:
228        return f"BatchSasaResult(n_frames={self.n_frames}, n_atoms={self.n_atoms})"

Result of batch SASA calculation for multiple frames.

Attributes: atom_areas: Per-atom SASA values for all frames, shape (n_frames, n_atoms). Values are in Angstrom² (Ų).

BatchSasaResult( atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float32]])
atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float32]]
n_frames: int
212    @property
213    def n_frames(self) -> int:
214        """Number of frames."""
215        return self.atom_areas.shape[0]

Number of frames.

n_atoms: int
217    @property
218    def n_atoms(self) -> int:
219        """Number of atoms."""
220        return self.atom_areas.shape[1]

Number of atoms.

total_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float32]]
222    @property
223    def total_areas(self) -> NDArray[np.float32]:
224        """Total SASA per frame, shape (n_frames,)."""
225        return self.atom_areas.sum(axis=1)

Total SASA per frame, shape (n_frames,).

def process_directory( input_dir: str | pathlib._local.Path, *, output_dir: str | pathlib._local.Path | None = None, algorithm: Literal['sr', 'lr'] = 'sr', n_points: int = 100, n_slices: int = 20, probe_radius: float = 1.4, n_threads: int = 0, classifier: ClassifierType | None = <ClassifierType.CCD: 3>, include_hydrogens: bool = False, include_hetatm: bool = False) -> BatchDirResult:
 71def process_directory(
 72    input_dir: str | Path,
 73    *,
 74    output_dir: str | Path | None = None,
 75    algorithm: Literal["sr", "lr"] = "sr",
 76    n_points: int = 100,
 77    n_slices: int = 20,
 78    probe_radius: float = 1.4,
 79    n_threads: int = 0,
 80    classifier: ClassifierType | None = ClassifierType.CCD,
 81    include_hydrogens: bool = False,
 82    include_hetatm: bool = False,
 83) -> BatchDirResult:
 84    """Process all supported structure files in a directory for SASA calculation.
 85
 86    Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent),
 87    JSON (.json), and their gzip-compressed variants (.gz).
 88
 89    Args:
 90        input_dir: Path to directory containing structure files.
 91        output_dir: Optional path for per-file output. None = no file output.
 92        algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards).
 93        n_points: Number of test points per atom (SR only; ignored for LR).
 94            Default: 100.
 95        n_slices: Number of slices per atom (LR only; ignored for SR).
 96            Default: 20.
 97        probe_radius: Water probe radius in Angstroms. Default: 1.4.
 98        n_threads: Number of threads to use. 0 = auto-detect. Default: 0.
 99        classifier: Classifier for radius assignment. None = use input radii.
100            Default: ClassifierType.CCD.
101        include_hydrogens: Whether to include hydrogen atoms. Default: False.
102        include_hetatm: Whether to include HETATM records. Default: False.
103
104    Returns:
105        BatchDirResult with per-file details.
106
107    Raises:
108        ValueError: If input parameters are invalid.
109        FileNotFoundError: If the input directory does not exist.
110        MemoryError: If out of memory.
111        RuntimeError: For other processing errors.
112
113    Example:
114        >>> from zsasa import process_directory
115        >>> result = process_directory("path/to/pdbs/")
116        >>> print(f"Processed {result.successful}/{result.total_files} files")
117    """
118    ffi, lib = _get_lib()
119
120    # Validate algorithm
121    if algorithm == "sr":
122        algo_int = ZSASA_ALGORITHM_SR
123        n_points_val = n_points
124    elif algorithm == "lr":
125        algo_int = ZSASA_ALGORITHM_LR
126        n_points_val = n_slices
127    else:
128        msg = f"Unknown algorithm: {algorithm}. Use 'sr' or 'lr'."
129        raise ValueError(msg)
130
131    if n_points <= 0:
132        msg = f"n_points must be positive, got {n_points}"
133        raise ValueError(msg)
134    if n_slices <= 0:
135        msg = f"n_slices must be positive, got {n_slices}"
136        raise ValueError(msg)
137    if probe_radius <= 0:
138        msg = f"probe_radius must be positive, got {probe_radius}"
139        raise ValueError(msg)
140    if n_threads < 0:
141        msg = f"n_threads must be non-negative, got {n_threads}"
142        raise ValueError(msg)
143
144    # Map classifier
145    classifier_int = int(classifier) if classifier is not None else -1
146
147    # Convert paths
148    input_dir_bytes = str(input_dir).encode("utf-8")
149    output_dir_bytes = str(output_dir).encode("utf-8") if output_dir is not None else ffi.NULL
150
151    error_code = ffi.new("int*")
152
153    handle = lib.zsasa_batch_dir_process(
154        input_dir_bytes,
155        output_dir_bytes,
156        algo_int,
157        n_points_val,
158        probe_radius,
159        n_threads,
160        classifier_int,
161        int(include_hydrogens),
162        int(include_hetatm),
163        error_code,
164    )
165
166    if handle == ffi.NULL:
167        ec = error_code[0]
168        if ec == ZSASA_ERROR_INVALID_INPUT:
169            msg = "Invalid input parameters for directory batch processing"
170            raise ValueError(msg)
171        elif ec == ZSASA_ERROR_OUT_OF_MEMORY:
172            msg = "Out of memory during directory batch processing"
173            raise MemoryError(msg)
174        elif ec == ZSASA_ERROR_CALCULATION:
175            msg = "SASA calculation failed during directory batch processing"
176            raise RuntimeError(msg)
177        elif ec == ZSASA_ERROR_FILE_IO:
178            msg = f"Directory not found or not readable: {input_dir}"
179            raise FileNotFoundError(msg)
180        else:
181            msg = f"Directory batch processing failed with error code: {ec}"
182            raise RuntimeError(msg)
183
184    try:
185        total_files = lib.zsasa_batch_dir_get_total_files(handle)
186        successful = lib.zsasa_batch_dir_get_successful(handle)
187        failed = lib.zsasa_batch_dir_get_failed(handle)
188
189        filenames: list[str] = []
190        n_atoms_list: list[int] = []
191        total_sasa_list: list[float] = []
192        status_list: list[int] = []
193
194        for i in range(total_files):
195            fname_ptr = lib.zsasa_batch_dir_get_filename(handle, i)
196            if fname_ptr == ffi.NULL:
197                msg = (
198                    f"Internal error: zsasa_batch_dir_get_filename returned NULL "
199                    f"for index {i} (total_files={total_files})"
200                )
201                raise RuntimeError(msg)
202            filenames.append(ffi.string(fname_ptr).decode("utf-8"))
203            n_atoms_list.append(lib.zsasa_batch_dir_get_n_atoms(handle, i))
204            sasa = lib.zsasa_batch_dir_get_total_sasa(handle, i)
205            total_sasa_list.append(float("nan") if math.isnan(sasa) else sasa)
206            st = lib.zsasa_batch_dir_get_status(handle, i)
207            if st not in (0, 1):
208                msg = f"Internal error: unexpected status {st} for file index {i} (expected 0 or 1)"
209                raise RuntimeError(msg)
210            status_list.append(st)
211
212        return BatchDirResult(
213            total_files=total_files,
214            successful=successful,
215            failed=failed,
216            filenames=filenames,
217            n_atoms=n_atoms_list,
218            total_sasa=total_sasa_list,
219            status=status_list,
220        )
221    finally:
222        lib.zsasa_batch_dir_free(handle)

Process all supported structure files in a directory for SASA calculation.

Supported formats: PDB (.pdb), mmCIF (.cif, .mmcif), PDB/ENT (.ent), JSON (.json), and their gzip-compressed variants (.gz).

Args: input_dir: Path to directory containing structure files. output_dir: Optional path for per-file output. None = no file output. algorithm: Algorithm to use: "sr" (Shrake-Rupley) or "lr" (Lee-Richards). n_points: Number of test points per atom (SR only; ignored for LR). Default: 100. n_slices: Number of slices per atom (LR only; ignored for SR). Default: 20. probe_radius: Water probe radius in Angstroms. Default: 1.4. n_threads: Number of threads to use. 0 = auto-detect. Default: 0. classifier: Classifier for radius assignment. None = use input radii. Default: ClassifierType.CCD. include_hydrogens: Whether to include hydrogen atoms. Default: False. include_hetatm: Whether to include HETATM records. Default: False.

Returns: BatchDirResult with per-file details.

Raises: ValueError: If input parameters are invalid. FileNotFoundError: If the input directory does not exist. MemoryError: If out of memory. RuntimeError: For other processing errors.

Example:

from zsasa import process_directory result = process_directory("path/to/pdbs/") print(f"Processed {result.successful}/{result.total_files} files")

@dataclass
class BatchDirResult:
23@dataclass
24class BatchDirResult:
25    """Result of directory batch processing.
26
27    Attributes:
28        total_files: Number of supported structure files found in the directory.
29        successful: Number of successfully processed files.
30        failed: Number of files that failed processing.
31        filenames: List of filenames (file name only, without directory path).
32        n_atoms: Per-file atom counts.
33        total_sasa: Per-file total SASA in Angstroms² (NaN for failed files).
34        status: Per-file status (1=ok, 0=failed).
35    """
36
37    total_files: int
38    successful: int
39    failed: int
40    filenames: list[str]
41    n_atoms: list[int]
42    total_sasa: list[float]
43    status: list[int]
44
45    def __post_init__(self) -> None:
46        n = len(self.filenames)
47        if len(self.n_atoms) != n or len(self.total_sasa) != n or len(self.status) != n:
48            msg = (
49                f"All per-file lists must have the same length as filenames ({n}), "
50                f"got n_atoms={len(self.n_atoms)}, total_sasa={len(self.total_sasa)}, "
51                f"status={len(self.status)}"
52            )
53            raise ValueError(msg)
54        if self.total_files != n:
55            msg = f"total_files ({self.total_files}) != len(filenames) ({n})"
56            raise ValueError(msg)
57        if self.successful + self.failed != self.total_files:
58            msg = (
59                f"successful ({self.successful}) + failed ({self.failed}) "
60                f"!= total_files ({self.total_files})"
61            )
62            raise ValueError(msg)
63
64    def __repr__(self) -> str:
65        return (
66            f"BatchDirResult(total_files={self.total_files}, "
67            f"successful={self.successful}, failed={self.failed})"
68        )

Result of directory batch processing.

Attributes: total_files: Number of supported structure files found in the directory. successful: Number of successfully processed files. failed: Number of files that failed processing. filenames: List of filenames (file name only, without directory path). n_atoms: Per-file atom counts. total_sasa: Per-file total SASA in Angstroms² (NaN for failed files). status: Per-file status (1=ok, 0=failed).

BatchDirResult( total_files: int, successful: int, failed: int, filenames: list[str], n_atoms: list[int], total_sasa: list[float], status: list[int])
total_files: int
successful: int
failed: int
filenames: list[str]
n_atoms: list[int]
total_sasa: list[float]
status: list[int]
class ClassifierType(enum.IntEnum):
26class ClassifierType(IntEnum):
27    """Available classifier types for atom radius assignment.
28
29    Attributes:
30        CCD: CCD-based radii (default). Hardcoded ProtOr radii + runtime CCD
31             analysis for non-standard residues via bond topology.
32        PROTOR: Alias for CCD. Kept for backward compatibility.
33        NACCESS: NACCESS-compatible radii.
34        OONS: Ooi, Oobatake, Nemethy, Scheraga radii.
35    """
36
37    NACCESS = ZSASA_CLASSIFIER_NACCESS
38    PROTOR = ZSASA_CLASSIFIER_PROTOR
39    OONS = ZSASA_CLASSIFIER_OONS
40    CCD = ZSASA_CLASSIFIER_CCD

Available classifier types for atom radius assignment.

Attributes: CCD: CCD-based radii (default). Hardcoded ProtOr radii + runtime CCD analysis for non-standard residues via bond topology. PROTOR: Alias for CCD. Kept for backward compatibility. NACCESS: NACCESS-compatible radii. OONS: Ooi, Oobatake, Nemethy, Scheraga radii.

NACCESS = <ClassifierType.NACCESS: 0>
PROTOR = <ClassifierType.PROTOR: 1>
OONS = <ClassifierType.OONS: 2>
CCD = <ClassifierType.CCD: 3>
class AtomClass(enum.IntEnum):
43class AtomClass(IntEnum):
44    """Atom polarity classes.
45
46    Attributes:
47        POLAR: Polar atoms (e.g., N, O).
48        APOLAR: Apolar/hydrophobic atoms (e.g., C).
49        UNKNOWN: Unknown classification.
50    """
51
52    POLAR = ZSASA_ATOM_CLASS_POLAR
53    APOLAR = ZSASA_ATOM_CLASS_APOLAR
54    UNKNOWN = ZSASA_ATOM_CLASS_UNKNOWN

Atom polarity classes.

Attributes: POLAR: Polar atoms (e.g., N, O). APOLAR: Apolar/hydrophobic atoms (e.g., C). UNKNOWN: Unknown classification.

POLAR = <AtomClass.POLAR: 0>
APOLAR = <AtomClass.APOLAR: 1>
UNKNOWN = <AtomClass.UNKNOWN: 2>
@dataclass
class ClassificationResult:
173@dataclass
174class ClassificationResult:
175    """Result of batch atom classification.
176
177    Attributes:
178        radii: Per-atom van der Waals radii in Angstroms.
179               NaN values indicate atoms not found in the classifier.
180               Use np.isnan(result.radii) to find unknown atoms.
181        classes: Per-atom polarity classes (AtomClass constants).
182
183    Example:
184        >>> result = classify_atoms(residues, atoms)
185        >>> unknown_mask = np.isnan(result.radii)
186        >>> if unknown_mask.any():
187        ...     print(f"Found {unknown_mask.sum()} unknown atoms")
188    """
189
190    radii: NDArray[np.float64]
191    classes: NDArray[np.int32]
192
193    def __repr__(self) -> str:
194        return f"ClassificationResult(n_atoms={len(self.radii)})"

Result of batch atom classification.

Attributes: radii: Per-atom van der Waals radii in Angstroms. NaN values indicate atoms not found in the classifier. Use np.isnan(result.radii) to find unknown atoms. classes: Per-atom polarity classes (AtomClass constants).

Example:

result = classify_atoms(residues, atoms) unknown_mask = np.isnan(result.radii) if unknown_mask.any(): ... print(f"Found {unknown_mask.sum()} unknown atoms")

ClassificationResult( radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], classes: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.int32]])
radii: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]]
classes: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.int32]]
def get_radius( residue: str, atom: str, classifier_type: ClassifierType = <ClassifierType.CCD: 3>) -> float | None:
57def get_radius(
58    residue: str,
59    atom: str,
60    classifier_type: ClassifierType = ClassifierType.CCD,
61) -> float | None:
62    """Get van der Waals radius for an atom using the specified classifier.
63
64    Args:
65        residue: Residue name (e.g., "ALA", "GLY").
66        atom: Atom name (e.g., "CA", "CB").
67        classifier_type: Classifier to use. Default: ClassifierType.CCD.
68
69    Returns:
70        Radius in Angstroms, or None if atom is not found in classifier.
71
72    Example:
73        >>> from zsasa import get_radius, ClassifierType
74        >>> get_radius("ALA", "CA")
75        1.87
76        >>> get_radius("ALA", "XX")  # Unknown atom
77        None
78    """
79    _, lib = _get_lib()
80    radius = lib.zsasa_classifier_get_radius(
81        classifier_type,
82        residue.encode("utf-8"),
83        atom.encode("utf-8"),
84    )
85    if np.isnan(radius):
86        return None
87    return radius

Get van der Waals radius for an atom using the specified classifier.

Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.CCD.

Returns: Radius in Angstroms, or None if atom is not found in classifier.

Example:

from zsasa import get_radius, ClassifierType get_radius("ALA", "CA") 1.87 get_radius("ALA", "XX") # Unknown atom None

def get_atom_class( residue: str, atom: str, classifier_type: ClassifierType = <ClassifierType.CCD: 3>) -> int:
 90def get_atom_class(
 91    residue: str,
 92    atom: str,
 93    classifier_type: ClassifierType = ClassifierType.CCD,
 94) -> int:
 95    """Get atom polarity class using the specified classifier.
 96
 97    Args:
 98        residue: Residue name (e.g., "ALA", "GLY").
 99        atom: Atom name (e.g., "CA", "CB").
100        classifier_type: Classifier to use. Default: ClassifierType.CCD.
101
102    Returns:
103        AtomClass constant (POLAR, APOLAR, or UNKNOWN).
104
105    Example:
106        >>> from zsasa import get_atom_class, AtomClass
107        >>> get_atom_class("ALA", "CA") == AtomClass.APOLAR
108        True
109        >>> get_atom_class("ALA", "O") == AtomClass.POLAR
110        True
111    """
112    _, lib = _get_lib()
113    return lib.zsasa_classifier_get_class(
114        classifier_type,
115        residue.encode("utf-8"),
116        atom.encode("utf-8"),
117    )

Get atom polarity class using the specified classifier.

Args: residue: Residue name (e.g., "ALA", "GLY"). atom: Atom name (e.g., "CA", "CB"). classifier_type: Classifier to use. Default: ClassifierType.CCD.

Returns: AtomClass constant (POLAR, APOLAR, or UNKNOWN).

Example:

from zsasa import get_atom_class, AtomClass get_atom_class("ALA", "CA") == AtomClass.APOLAR True get_atom_class("ALA", "O") == AtomClass.POLAR True

def guess_radius(element: str) -> float | None:
120def guess_radius(element: str) -> float | None:
121    """Guess van der Waals radius from element symbol.
122
123    Args:
124        element: Element symbol (e.g., "C", "N", "FE").
125                 Case-insensitive, whitespace is trimmed.
126
127    Returns:
128        Radius in Angstroms, or None if element is not recognized.
129
130    Example:
131        >>> from zsasa import guess_radius
132        >>> guess_radius("C")
133        1.7
134        >>> guess_radius("FE")
135        1.26
136        >>> guess_radius("XX")  # Unknown
137        None
138    """
139    _, lib = _get_lib()
140    radius = lib.zsasa_guess_radius(element.encode("utf-8"))
141    if np.isnan(radius):
142        return None
143    return radius

Guess van der Waals radius from element symbol.

Args: element: Element symbol (e.g., "C", "N", "FE"). Case-insensitive, whitespace is trimmed.

Returns: Radius in Angstroms, or None if element is not recognized.

Example:

from zsasa import guess_radius guess_radius("C") 1.7 guess_radius("FE") 1.26 guess_radius("XX") # Unknown None

def guess_radius_from_atom_name(atom_name: str) -> float | None:
146def guess_radius_from_atom_name(atom_name: str) -> float | None:
147    """Guess van der Waals radius from PDB-style atom name.
148
149    Extracts element symbol from atom name following PDB conventions:
150    - Leading space indicates single-char element (e.g., " CA " = Carbon alpha)
151    - No leading space may indicate 2-char element (e.g., "FE  " = Iron)
152
153    Args:
154        atom_name: PDB-style atom name (e.g., " CA ", "FE  ").
155
156    Returns:
157        Radius in Angstroms, or None if element cannot be determined.
158
159    Example:
160        >>> from zsasa import guess_radius_from_atom_name
161        >>> guess_radius_from_atom_name(" CA ")  # Carbon alpha
162        1.7
163        >>> guess_radius_from_atom_name("FE  ")  # Iron
164        1.26
165    """
166    _, lib = _get_lib()
167    radius = lib.zsasa_guess_radius_from_atom_name(atom_name.encode("utf-8"))
168    if np.isnan(radius):
169        return None
170    return radius

Guess van der Waals radius from PDB-style atom name.

Extracts element symbol from atom name following PDB conventions:

  • Leading space indicates single-char element (e.g., " CA " = Carbon alpha)
  • No leading space may indicate 2-char element (e.g., "FE " = Iron)

Args: atom_name: PDB-style atom name (e.g., " CA ", "FE ").

Returns: Radius in Angstroms, or None if element cannot be determined.

Example:

from zsasa import guess_radius_from_atom_name guess_radius_from_atom_name(" CA ") # Carbon alpha 1.7 guess_radius_from_atom_name("FE ") # Iron 1.26

def classify_atoms( residues: list[str], atoms: list[str], classifier_type: ClassifierType = <ClassifierType.CCD: 3>, *, include_classes: bool = True) -> ClassificationResult:
197def classify_atoms(
198    residues: list[str],
199    atoms: list[str],
200    classifier_type: ClassifierType = ClassifierType.CCD,
201    *,
202    include_classes: bool = True,
203) -> ClassificationResult:
204    """Classify multiple atoms at once (batch operation).
205
206    This is more efficient than calling get_radius for each atom individually.
207
208    Args:
209        residues: List of residue names.
210        atoms: List of atom names (must be same length as residues).
211        classifier_type: Classifier to use. Default: ClassifierType.CCD.
212        include_classes: Whether to compute atom classes. Default: True.
213
214    Returns:
215        ClassificationResult with radii and classes arrays.
216        Unknown atoms have NaN radius and UNKNOWN class.
217
218    Raises:
219        ValueError: If residues and atoms have different lengths.
220
221    Example:
222        >>> from zsasa import classify_atoms
223        >>> result = classify_atoms(
224        ...     ["ALA", "ALA", "GLY"],
225        ...     ["CA", "O", "N"],
226        ... )
227        >>> result.radii
228        array([1.87, 1.4 , 1.65])
229    """
230    ffi, lib = _get_lib()
231
232    if len(residues) != len(atoms):
233        msg = f"residues and atoms must have same length: {len(residues)} != {len(atoms)}"
234        raise ValueError(msg)
235
236    n_atoms = len(residues)
237    if n_atoms == 0:
238        return ClassificationResult(
239            radii=np.array([], dtype=np.float64),
240            classes=np.array([], dtype=np.int32),
241        )
242
243    # Encode strings and create cffi arrays
244    # Keep references to prevent garbage collection
245    residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residues]
246    atoms_bytes = [ffi.new("char[]", a.encode("utf-8")) for a in atoms]
247
248    residues_arr = ffi.new("char*[]", residues_bytes)
249    atoms_arr = ffi.new("char*[]", atoms_bytes)
250
251    # Allocate output arrays
252    radii = np.zeros(n_atoms, dtype=np.float64)
253    classes = np.zeros(n_atoms, dtype=np.int32) if include_classes else None
254
255    radii_ptr = ffi.cast("double*", radii.ctypes.data)
256    classes_ptr = ffi.cast("int*", classes.ctypes.data) if include_classes else ffi.NULL
257
258    result = lib.zsasa_classify_atoms(
259        classifier_type,
260        residues_arr,
261        atoms_arr,
262        n_atoms,
263        radii_ptr,
264        classes_ptr,
265    )
266
267    if result == ZSASA_ERROR_INVALID_INPUT:
268        msg = f"Invalid classifier type: {classifier_type}"
269        raise ValueError(msg)
270    elif result != ZSASA_OK:
271        msg = f"Classification error: {result}"
272        raise RuntimeError(msg)
273
274    if not include_classes:
275        classes = np.full(n_atoms, AtomClass.UNKNOWN, dtype=np.int32)
276
277    return ClassificationResult(radii=radii, classes=classes)

Classify multiple atoms at once (batch operation).

This is more efficient than calling get_radius for each atom individually.

Args: residues: List of residue names. atoms: List of atom names (must be same length as residues). classifier_type: Classifier to use. Default: ClassifierType.CCD. include_classes: Whether to compute atom classes. Default: True.

Returns: ClassificationResult with radii and classes arrays. Unknown atoms have NaN radius and UNKNOWN class.

Raises: ValueError: If residues and atoms have different lengths.

Example:

from zsasa import classify_atoms result = classify_atoms( ... ["ALA", "ALA", "GLY"], ... ["CA", "O", "N"], ... ) result.radii array([1.87, 1.4 , 1.65])

MAX_SASA = {'ALA': 129.0, 'ARG': 274.0, 'ASN': 195.0, 'ASP': 193.0, 'CYS': 167.0, 'GLN': 225.0, 'GLU': 223.0, 'GLY': 104.0, 'HIS': 224.0, 'ILE': 197.0, 'LEU': 201.0, 'LYS': 236.0, 'MET': 224.0, 'PHE': 240.0, 'PRO': 159.0, 'SER': 155.0, 'THR': 172.0, 'TRP': 285.0, 'TYR': 263.0, 'VAL': 174.0}
def get_max_sasa(residue_name: str) -> float | None:
36def get_max_sasa(residue_name: str) -> float | None:
37    """Get maximum SASA value for a standard amino acid.
38
39    Values from Tien et al. (2013) "Maximum allowed solvent accessibilities
40    of residues in proteins".
41
42    Args:
43        residue_name: 3-letter residue code (e.g., "ALA", "GLY").
44
45    Returns:
46        Maximum SASA in Angstroms², or None if residue is not a standard amino acid.
47
48    Example:
49        >>> from zsasa import get_max_sasa
50        >>> get_max_sasa("ALA")
51        129.0
52        >>> get_max_sasa("TRP")
53        285.0
54        >>> get_max_sasa("HOH")  # Water - not a standard amino acid
55        None
56    """
57    _, lib = _get_lib()
58    max_sasa = lib.zsasa_get_max_sasa(residue_name.encode("utf-8"))
59    if np.isnan(max_sasa):
60        return None
61    return max_sasa

Get maximum SASA value for a standard amino acid.

Values from Tien et al. (2013) "Maximum allowed solvent accessibilities of residues in proteins".

Args: residue_name: 3-letter residue code (e.g., "ALA", "GLY").

Returns: Maximum SASA in Angstroms², or None if residue is not a standard amino acid.

Example:

from zsasa import get_max_sasa get_max_sasa("ALA") 129.0 get_max_sasa("TRP") 285.0 get_max_sasa("HOH") # Water - not a standard amino acid None

def calculate_rsa(sasa: float, residue_name: str) -> float | None:
64def calculate_rsa(sasa: float, residue_name: str) -> float | None:
65    """Calculate RSA (Relative Solvent Accessibility) for a single residue.
66
67    RSA = SASA / MaxSASA
68
69    Args:
70        sasa: Observed SASA value in Angstroms².
71        residue_name: 3-letter residue code (e.g., "ALA", "GLY").
72
73    Returns:
74        RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid.
75        Note: RSA > 1.0 is possible for exposed terminal residues.
76
77    Example:
78        >>> from zsasa import calculate_rsa
79        >>> calculate_rsa(64.5, "ALA")  # 64.5 / 129.0 = 0.5
80        0.5
81        >>> calculate_rsa(150.0, "GLY")  # RSA > 1.0 is possible
82        1.4423076923076923
83    """
84    _, lib = _get_lib()
85    rsa = lib.zsasa_calculate_rsa(sasa, residue_name.encode("utf-8"))
86    if np.isnan(rsa):
87        return None
88    return rsa

Calculate RSA (Relative Solvent Accessibility) for a single residue.

RSA = SASA / MaxSASA

Args: sasa: Observed SASA value in Angstroms². residue_name: 3-letter residue code (e.g., "ALA", "GLY").

Returns: RSA value (typically 0.0-1.0), or None if residue is not a standard amino acid. Note: RSA > 1.0 is possible for exposed terminal residues.

Example:

from zsasa import calculate_rsa calculate_rsa(64.5, "ALA") # 64.5 / 129.0 = 0.5 0.5 calculate_rsa(150.0, "GLY") # RSA > 1.0 is possible 1.4423076923076923

def calculate_rsa_batch( sasas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]] | list[float], residue_names: list[str]) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]]:
 91def calculate_rsa_batch(
 92    sasas: NDArray[np.float64] | list[float],
 93    residue_names: list[str],
 94) -> NDArray[np.float64]:
 95    """Calculate RSA for multiple residues at once (batch operation).
 96
 97    This is more efficient than calling calculate_rsa for each residue individually.
 98
 99    Args:
100        sasas: Array of SASA values in Angstroms².
101        residue_names: List of 3-letter residue codes (must be same length as sasas).
102
103    Returns:
104        Array of RSA values. NaN values indicate non-standard amino acids.
105
106    Raises:
107        ValueError: If sasas and residue_names have different lengths.
108
109    Example:
110        >>> import numpy as np
111        >>> from zsasa import calculate_rsa_batch
112        >>> sasas = np.array([64.5, 52.0, 100.0])
113        >>> residues = ["ALA", "GLY", "HOH"]  # HOH is not standard
114        >>> rsa = calculate_rsa_batch(sasas, residues)
115        >>> rsa
116        array([0.5       , 0.5       ,        nan])
117    """
118    ffi, lib = _get_lib()
119
120    sasas = np.ascontiguousarray(sasas, dtype=np.float64)
121    n_residues = len(sasas)
122
123    if len(residue_names) != n_residues:
124        msg = f"sasas and residue_names must have same length: {n_residues} != {len(residue_names)}"
125        raise ValueError(msg)
126
127    if n_residues == 0:
128        return np.array([], dtype=np.float64)
129
130    # Encode strings and create cffi array
131    # Keep references to prevent garbage collection
132    residues_bytes = [ffi.new("char[]", r.encode("utf-8")) for r in residue_names]
133    residues_arr = ffi.new("char*[]", residues_bytes)
134
135    # Allocate output array
136    rsa_out = np.zeros(n_residues, dtype=np.float64)
137
138    sasas_ptr = ffi.cast("double*", sasas.ctypes.data)
139    rsa_ptr = ffi.cast("double*", rsa_out.ctypes.data)
140
141    result = lib.zsasa_calculate_rsa_batch(sasas_ptr, residues_arr, n_residues, rsa_ptr)
142    if result != ZSASA_OK:
143        msg = f"RSA batch calculation failed with error code: {result}"
144        raise RuntimeError(msg)
145
146    return rsa_out

Calculate RSA for multiple residues at once (batch operation).

This is more efficient than calling calculate_rsa for each residue individually.

Args: sasas: Array of SASA values in Angstroms². residue_names: List of 3-letter residue codes (must be same length as sasas).

Returns: Array of RSA values. NaN values indicate non-standard amino acids.

Raises: ValueError: If sasas and residue_names have different lengths.

Example:

import numpy as np from zsasa import calculate_rsa_batch sasas = np.array([64.5, 52.0, 100.0]) residues = ["ALA", "GLY", "HOH"] # HOH is not standard rsa = calculate_rsa_batch(sasas, residues) rsa array([0.5 , 0.5 , nan])

@dataclass
class ResidueResult:
41@dataclass
42class ResidueResult:
43    """Per-residue SASA result.
44
45    Attributes:
46        chain_id: Chain identifier (e.g., "A", "B").
47        residue_id: Residue sequence number.
48        residue_name: 3-letter residue name (e.g., "ALA", "GLY").
49        total_area: Total SASA for this residue in A^2.
50        polar_area: Polar SASA (N, O atoms, etc.) in A^2.
51        apolar_area: Apolar SASA (C atoms, etc.) in A^2.
52        rsa: Relative Solvent Accessibility (0.0-1.0+), or None for
53             non-standard amino acids.
54        n_atoms: Number of atoms in this residue.
55    """
56
57    chain_id: str
58    residue_id: int
59    residue_name: str
60    total_area: float
61    polar_area: float
62    apolar_area: float
63    rsa: float | None
64    n_atoms: int
65
66    def __repr__(self) -> str:
67        rsa_str = f"{self.rsa:.3f}" if self.rsa is not None else "None"
68        return (
69            f"ResidueResult({self.chain_id}:{self.residue_name}{self.residue_id}, "
70            f"total={self.total_area:.1f}, rsa={rsa_str}, n_atoms={self.n_atoms})"
71        )

Per-residue SASA result.

Attributes: chain_id: Chain identifier (e.g., "A", "B"). residue_id: Residue sequence number. residue_name: 3-letter residue name (e.g., "ALA", "GLY"). total_area: Total SASA for this residue in A^2. polar_area: Polar SASA (N, O atoms, etc.) in A^2. apolar_area: Apolar SASA (C atoms, etc.) in A^2. rsa: Relative Solvent Accessibility (0.0-1.0+), or None for non-standard amino acids. n_atoms: Number of atoms in this residue.

ResidueResult( chain_id: str, residue_id: int, residue_name: str, total_area: float, polar_area: float, apolar_area: float, rsa: float | None, n_atoms: int)
chain_id: str
residue_id: int
residue_name: str
total_area: float
polar_area: float
apolar_area: float
rsa: float | None
n_atoms: int
def aggregate_by_residue( atom_areas: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.float64]], chain_ids: list[str], residue_ids: list[int], residue_names: list[str], atom_classes: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.int32]] | None = None) -> list[ResidueResult]:
 74def aggregate_by_residue(
 75    atom_areas: NDArray[np.float64],
 76    chain_ids: list[str],
 77    residue_ids: list[int],
 78    residue_names: list[str],
 79    atom_classes: NDArray[np.int32] | None = None,
 80) -> list[ResidueResult]:
 81    """Aggregate per-atom SASA values to per-residue.
 82
 83    Groups atoms by (chain_id, residue_id) and sums their SASA values.
 84    Also calculates polar/apolar breakdown and RSA if atom classes are provided.
 85
 86    Args:
 87        atom_areas: Per-atom SASA values in A^2.
 88        chain_ids: Chain ID for each atom.
 89        residue_ids: Residue sequence number for each atom.
 90        residue_names: Residue name for each atom.
 91        atom_classes: Optional per-atom polarity classes (AtomClass values).
 92                      If provided, polar_area and apolar_area will be calculated.
 93
 94    Returns:
 95        List of ResidueResult objects, one per unique residue.
 96        Results are ordered by appearance in the input (preserves chain/residue order).
 97
 98    Example:
 99        >>> import numpy as np
100        >>> from zsasa.analysis import aggregate_by_residue
101        >>>
102        >>> atom_areas = np.array([10.0, 20.0, 15.0, 25.0])
103        >>> chain_ids = ["A", "A", "A", "A"]
104        >>> residue_ids = [1, 1, 2, 2]
105        >>> residue_names = ["ALA", "ALA", "GLY", "GLY"]
106        >>>
107        >>> residues = aggregate_by_residue(
108        ...     atom_areas, chain_ids, residue_ids, residue_names
109        ... )
110        >>> len(residues)
111        2
112        >>> residues[0].total_area
113        30.0
114    """
115    n_atoms = len(atom_areas)
116    if n_atoms == 0:
117        return []
118
119    # Validate input lengths
120    if len(chain_ids) != n_atoms:
121        msg = f"chain_ids length ({len(chain_ids)}) != atom_areas length ({n_atoms})"
122        raise ValueError(msg)
123    if len(residue_ids) != n_atoms:
124        msg = f"residue_ids length ({len(residue_ids)}) != atom_areas length ({n_atoms})"
125        raise ValueError(msg)
126    if len(residue_names) != n_atoms:
127        msg = f"residue_names length ({len(residue_names)}) != atom_areas length ({n_atoms})"
128        raise ValueError(msg)
129    if atom_classes is not None and len(atom_classes) != n_atoms:
130        msg = f"atom_classes length ({len(atom_classes)}) != atom_areas length ({n_atoms})"
131        raise ValueError(msg)
132
133    # Group atoms by (chain_id, residue_id)
134    # Use dict to preserve insertion order (Python 3.7+)
135    residue_data: dict[tuple[str, int], dict] = {}
136
137    for i in range(n_atoms):
138        key = (chain_ids[i], residue_ids[i])
139
140        if key not in residue_data:
141            residue_data[key] = {
142                "residue_name": residue_names[i],
143                "total_area": 0.0,
144                "polar_area": 0.0,
145                "apolar_area": 0.0,
146                "n_atoms": 0,
147            }
148
149        data = residue_data[key]
150        area = float(atom_areas[i])
151        data["total_area"] += area
152        data["n_atoms"] += 1
153
154        if atom_classes is not None:
155            atom_class = atom_classes[i]
156            if atom_class == AtomClass.POLAR:
157                data["polar_area"] += area
158            elif atom_class == AtomClass.APOLAR:
159                data["apolar_area"] += area
160
161    # Build result list
162    results = []
163    for (chain_id, residue_id), data in residue_data.items():
164        residue_name = data["residue_name"]
165        total_area = data["total_area"]
166
167        # Calculate RSA if this is a standard amino acid
168        max_sasa = MAX_SASA.get(residue_name)
169        rsa = total_area / max_sasa if max_sasa is not None else None
170
171        results.append(
172            ResidueResult(
173                chain_id=chain_id,
174                residue_id=residue_id,
175                residue_name=residue_name,
176                total_area=total_area,
177                polar_area=data["polar_area"],
178                apolar_area=data["apolar_area"],
179                rsa=rsa,
180                n_atoms=data["n_atoms"],
181            )
182        )
183
184    return results

Aggregate per-atom SASA values to per-residue.

Groups atoms by (chain_id, residue_id) and sums their SASA values. Also calculates polar/apolar breakdown and RSA if atom classes are provided.

Args: atom_areas: Per-atom SASA values in A^2. chain_ids: Chain ID for each atom. residue_ids: Residue sequence number for each atom. residue_names: Residue name for each atom. atom_classes: Optional per-atom polarity classes (AtomClass values). If provided, polar_area and apolar_area will be calculated.

Returns: List of ResidueResult objects, one per unique residue. Results are ordered by appearance in the input (preserves chain/residue order).

Example:

import numpy as np from zsasa.analysis import aggregate_by_residue

atom_areas = np.array([10.0, 20.0, 15.0, 25.0]) chain_ids = ["A", "A", "A", "A"] residue_ids = [1, 1, 2, 2] residue_names = ["ALA", "ALA", "GLY", "GLY"]

residues = aggregate_by_residue( ... atom_areas, chain_ids, residue_ids, residue_names ... ) len(residues) 2 residues[0].total_area 30.0

def aggregate_from_result( result: zsasa.integrations._types.SasaResultWithAtoms) -> list[ResidueResult]:
187def aggregate_from_result(result: SasaResultWithAtoms) -> list[ResidueResult]:
188    """Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue.
189
190    This is a convenience wrapper that extracts the necessary data from
191    a SasaResultWithAtoms object returned by gemmi integration functions.
192
193    Args:
194        result: A SasaResultWithAtoms object from calculate_sasa_from_structure
195                or calculate_sasa_from_model.
196
197    Returns:
198        List of ResidueResult objects, one per unique residue.
199
200    Example:
201        >>> from zsasa.integrations.gemmi import calculate_sasa_from_structure
202        >>> from zsasa.analysis import aggregate_from_result
203        >>>
204        >>> result = calculate_sasa_from_structure("protein.cif")
205        >>> residues = aggregate_from_result(result)
206        >>>
207        >>> # Print buried residues (RSA < 0.25)
208        >>> for res in residues:
209        ...     if res.rsa is not None and res.rsa < 0.25:
210        ...         print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}")
211    """
212    return aggregate_by_residue(
213        atom_areas=result.atom_areas,
214        chain_ids=result.atom_data.chain_ids,
215        residue_ids=result.atom_data.residue_ids,
216        residue_names=result.atom_data.residue_names,
217        atom_classes=result.atom_classes,
218    )

Aggregate per-atom SASA from a SasaResultWithAtoms to per-residue.

This is a convenience wrapper that extracts the necessary data from a SasaResultWithAtoms object returned by gemmi integration functions.

Args: result: A SasaResultWithAtoms object from calculate_sasa_from_structure or calculate_sasa_from_model.

Returns: List of ResidueResult objects, one per unique residue.

Example:

from zsasa.integrations.gemmi import calculate_sasa_from_structure from zsasa.analysis import aggregate_from_result

result = calculate_sasa_from_structure("protein.cif") residues = aggregate_from_result(result)

Print buried residues (RSA < 0.25)

for res in residues: ... if res.rsa is not None and res.rsa < 0.25: ... print(f"{res.chain_id}:{res.residue_name}{res.residue_id}: {res.rsa:.1%}")

def get_version() -> str:
255def get_version() -> str:
256    """Get the library version string."""
257    ffi, lib = _get_lib()
258    return ffi.string(lib.zsasa_version()).decode("utf-8")

Get the library version string.