Skip to content

Infrastructure Module

Infrastructure and utility classes.

alienbio.infra

Infrastructure: entity base classes, I/O, serialization.

Entity

Base class for all biology objects.

Entities have a three-part structure (like a function call): - head: the entity type name (e.g., "Chemistry", "Molecule") - args: ordered children (contained entities) - attributes: keyword arguments (semantic content)

Entities form a tree structure with bidirectional links: - _parent: link to containing entity - _children: dict of child entities by local name - _top: either a Dat (for root entities) or the root Entity (for non-roots)

The _top field enables O(1) access to both root() and dat(). Names are derived by walking up the parent chain until a DAT anchor is found, then building the qualified path.

Source code in src/alienbio/infra/entity.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
class Entity:
    """Base class for all biology objects.

    Entities have a three-part structure (like a function call):
    - head: the entity type name (e.g., "Chemistry", "Molecule")
    - args: ordered children (contained entities)
    - attributes: keyword arguments (semantic content)

    Entities form a tree structure with bidirectional links:
    - _parent: link to containing entity
    - _children: dict of child entities by local name
    - _top: either a Dat (for root entities) or the root Entity (for non-roots)

    The _top field enables O(1) access to both root() and dat().
    Names are derived by walking up the parent chain until a DAT anchor
    is found, then building the qualified path.
    """

    __slots__ = ("_local_name", "_parent", "_children", "_top", "description")

    def __init_subclass__(cls, head: Optional[str] = None, **kwargs) -> None:
        """Auto-register subclasses in the head registry.

        Args:
            head: Optional head name for serialization.
                  If not provided, uses the class name.

        Example:
            class Molecule(Entity):  # registers as "Molecule"
                pass

            class Molecule(Entity, head="Mol"):  # registers as "Mol"
                pass
        """
        super().__init_subclass__(**kwargs)
        name = head if head else cls.__name__
        register_head(name, cls)

    def __init__(
        self,
        name: str,
        *,
        parent: Optional[Entity] = None,
        dat: Optional[Dat] = None,
        description: str = "",
    ) -> None:
        """Initialize an entity.

        Args:
            name: Local name within parent's children dict
            parent: Link to containing entity (optional if dat provided)
            dat: DAT anchor to filesystem (optional if parent provided)
            description: Human-readable description

        Raises:
            ValueError: If neither parent nor dat is provided
            ValueError: If name contains spaces
        """
        if parent is None and dat is None:
            raise ValueError(
                f"Entity {name!r} must have either a parent or a DAT anchor"
            )

        if " " in name:
            raise ValueError(
                f"Entity name {name!r} contains spaces; names must be valid identifiers"
            )

        self._local_name = name
        self._parent: Optional[Entity] = None
        self._children: Dict[str, Entity] = {}
        self.description = description

        # Set _top: Dat for root entities, root Entity for non-roots
        if dat is not None:
            self._top: Entity | Dat = dat
        else:
            # Will be set properly in set_parent()
            self._top = parent.root()  # type: ignore[union-attr]

        # Set parent (which also registers us as a child and updates _top)
        if parent is not None:
            self.set_parent(parent)

    @classmethod
    def hydrate(
        cls,
        data: dict[str, Any],
        *,
        dat: Optional[Dat] = None,
        parent: Optional[Entity] = None,
        local_name: Optional[str] = None,
    ) -> Self:
        """Create an entity instance from a dict.

        This is the standard way to convert YAML/JSON data to typed objects.
        Subclasses should override to handle their specific fields.

        Args:
            data: Dict containing entity data
            dat: DAT anchor (if this is a root entity)
            parent: Parent entity (if this is a child)
            local_name: Override the local name (defaults to data.get("name"))

        Returns:
            New instance of the entity class

        Example:
            mol = MoleculeImpl.hydrate({"name": "A", "bdepth": 0})
            chem = ChemistryImpl.hydrate({"molecules": {...}, "reactions": {...}})
        """
        # If neither dat nor parent provided, create a mock dat
        if dat is None and parent is None:
            name = local_name or data.get("name", cls.__name__.lower())
            dat = MockDat(f"{cls.__name__.lower()}/{name}")

        # Get name from data or use provided local_name
        name = local_name or data.get("name", cls.__name__.lower())

        # Base Entity just takes name, parent/dat, description
        return cls(
            name,
            parent=parent,
            dat=dat,
            description=data.get("description", ""),
        )

    @property
    def local_name(self) -> str:
        """Name within parent's children dict."""
        return self._local_name

    @property
    def parent(self) -> Optional[Entity]:
        """Link to containing entity."""
        return self._parent

    @property
    def children(self) -> Dict[str, Entity]:
        """Child entities by local name (read-only view)."""
        return self._children.copy()

    @property
    def head(self) -> str:
        """The entity's head (type name).

        This is the registered name used in serialization.
        """
        for name, registered_cls in _head_registry.items():
            if registered_cls is type(self):
                return name
        return type(self).__name__

    def attributes(self) -> Dict[str, Any]:
        """Semantic content of this entity (override in subclasses).

        Returns a dict of the entity's keyword arguments - its semantic
        content excluding head and children (args).

        Subclasses should override this to include their specific fields.
        """
        result: Dict[str, Any] = {"name": self._local_name}
        if self.description:
            result["description"] = self.description
        return result

    def dat(self) -> Dat:
        """Get the DAT anchor for this entity's tree.

        O(1) operation using the _top field.
        """
        if not isinstance(self._top, Entity):
            return self._top  # I am the root (_top is a Dat)
        # _top is the root Entity, get its DAT
        return self._top._top  # type: ignore[return-value]

    def root(self) -> Entity:
        """Get the root entity (the ancestor with the DAT anchor).

        O(1) operation using the _top field.
        """
        if not isinstance(self._top, Entity):
            return self  # I am the root (_top is a Dat)
        return self._top  # Direct pointer to root

    def set_parent(self, parent: Optional[Entity]) -> None:
        """Set the parent entity.

        Handles registration/deregistration in parent's children dict.
        Updates _top for this entity and all descendants.

        If parent is None, reparents to orphan root (entities are never invalid).
        """
        # Remove from old parent's children
        if self._parent is not None:
            self._parent._children.pop(self._local_name, None)

        # If parent is None, reparent to orphan root instead
        if parent is None:
            from alienbio import bio
            parent = bio.io.orphan_root

        self._parent = parent

        # Add to new parent's children and update _top
        if self._local_name in parent._children:
            raise ValueError(
                f"Parent already has child named {self._local_name!r}"
            )
        parent._children[self._local_name] = self
        # Update _top for this subtree to point to new root
        self._update_top(parent.root())

    def detach(self) -> None:
        """Detach this entity from its parent.

        The entity is reparented to the orphan root and remains fully valid.
        It can be re-attached later using set_parent().

        Prints as ORPHAN:name after detaching.
        """
        from alienbio import bio
        self.set_parent(bio.io.orphan_root)

    def _update_top(self, new_root: Entity) -> None:
        """Update _top for this entity and all descendants.

        Called when reparenting to maintain the _top invariant.
        """
        # Don't update if this entity has its own DAT (is a sub-root)
        if not isinstance(self._top, Entity):
            return

        self._top = new_root
        for child in self._children.values():
            child._update_top(new_root)

    @property
    def full_name(self) -> str:
        """Full path from DAT anchor (e.g., 'runs/exp1.cytoplasm.glucose').

        Walks up the parent chain until a DAT anchor is found,
        then builds the path from there.
        """
        if not isinstance(self._top, Entity):
            return self._top.get_path_name()  # I am root, _top is Dat
        return f"{self._parent.full_name}.{self._local_name}"

    def to_dict(self, recursive: bool = False, _root: Optional[Entity] = None) -> Dict[str, Any]:
        """Convert entity to dictionary representation for serialization.

        The dict has three parts (like a function call):
        - head: the entity type name
        - args: children (contained entities) - only if present and recursive
        - **attributes: semantic content (name, description, subclass fields)

        Args:
            recursive: If True, include children recursively
            _root: Internal - the root entity we're serializing from (to detect
                   children with different roots that need absolute refs)

        Returns:
            Dict with entity fields suitable for YAML/JSON serialization.
        """
        # Start with head
        result: Dict[str, Any] = {"head": self.head}

        # Add attributes (semantic content)
        result.update(self.attributes())

        # Add args (children) if recursive and present
        if recursive and self._children:
            # Track the root entity for this serialization
            if _root is None:
                _root = self.root()

            args_dict: Dict[str, Any] = {}
            for name, child in self._children.items():
                child_root = child.root()
                if child_root is not _root:
                    # Child belongs to a different DAT - use absolute ref
                    from alienbio import bio
                    args_dict[name] = bio.io.ref(child, absolute=True)
                else:
                    # Same DAT - inline the child
                    args_dict[name] = child.to_dict(recursive=True, _root=_root)
            result["args"] = args_dict

        return result

    def to_str(self, depth: int = -1) -> str:
        """String representation of entity tree.

        Returns a function-call style representation showing the entity
        and optionally its children.

        Args:
            depth: How deep to recurse into children.
                   -1 = unlimited, 0 = just this entity,
                   1 = include immediate children, etc.

        Returns:
            String like "World(Cytoplasm(Glucose, ATP), Nucleus)"

        Example:
            entity.to_str()      # full tree
            entity.to_str(0)     # just "World"
            entity.to_str(1)     # "World(Cytoplasm, Nucleus)"
        """
        if not self._children or depth == 0:
            return self._local_name

        next_depth = -1 if depth == -1 else depth - 1
        children_str = ", ".join(
            child.to_str(next_depth) for child in self._children.values()
        )
        return f"{self._local_name}({children_str})"

    def ancestors(self) -> Iterator[Entity]:
        """Iterate over ancestors from parent to root."""
        current = self._parent
        while current is not None:
            yield current
            current = current._parent

    def descendants(self) -> Iterator[Entity]:
        """Iterate over all descendants (depth-first)."""
        for child in self._children.values():
            yield child
            yield from child.descendants()

    def save(self) -> None:
        """Save this entity tree to disk.

        Must be called on the root entity (the one with the DAT anchor).
        Serializes the entire entity tree to entities.yaml in the DAT folder.

        Raises:
            ValueError: If not called on a root entity
            ValueError: If called on orphan root (orphans cannot be saved)
        """
        import yaml
        from pathlib import Path
        from .io import _OrphanDat

        if isinstance(self._top, Entity):
            raise ValueError(
                f"save() must be called on root entity. "
                f"Use self.root().save() instead."
            )

        if isinstance(self._top, _OrphanDat):
            raise ValueError(
                "Cannot save orphan entities - re-attach them to a real DAT first"
            )

        dat = self._top

        # Serialize the entity tree
        entity_data = self.to_dict(recursive=True)

        # Write to entities.yaml in DAT folder
        dat_path = Path(dat.get_path())
        entities_file = dat_path / "entities.yaml"
        with open(entities_file, "w") as f:
            yaml.dump(entity_data, f, default_flow_style=False, sort_keys=False)

        # Also save the DAT's spec
        dat.save()

    def __repr__(self) -> str:
        """Full reconstructible representation."""
        parts = [f"name={self._local_name!r}"]
        if self.description:
            parts.append(f"description={self.description!r}")
        if not isinstance(self._top, Entity) and self._top is not None:
            parts.append(f"dat={self._top.get_path_name()!r}")
        if self._parent is not None:
            parts.append(f"parent={self._parent._local_name!r}")
        if self._children:
            parts.append(f"children={list(self._children.keys())}")
        return f"Entity({', '.join(parts)})"

    def __str__(self) -> str:
        """Short display form using PREFIX:path if IO available.

        Falls back to full_name if no IO or prefix matches.
        """
        try:
            from alienbio import bio

            return bio.io.ref(self)
        except Exception:
            # Fall back to full_name if context not available
            try:
                return self.full_name
            except ValueError:
                return f"<Entity:{self._local_name}>"

local_name property

Name within parent's children dict.

parent property

Link to containing entity.

children property

Child entities by local name (read-only view).

head property

The entity's head (type name).

This is the registered name used in serialization.

full_name property

Full path from DAT anchor (e.g., 'runs/exp1.cytoplasm.glucose').

Walks up the parent chain until a DAT anchor is found, then builds the path from there.

__init_subclass__(head=None, **kwargs)

Auto-register subclasses in the head registry.

Parameters:

Name Type Description Default
head Optional[str]

Optional head name for serialization. If not provided, uses the class name.

None
Example

class Molecule(Entity): # registers as "Molecule" pass

class Molecule(Entity, head="Mol"): # registers as "Mol" pass

Source code in src/alienbio/infra/entity.py
def __init_subclass__(cls, head: Optional[str] = None, **kwargs) -> None:
    """Auto-register subclasses in the head registry.

    Args:
        head: Optional head name for serialization.
              If not provided, uses the class name.

    Example:
        class Molecule(Entity):  # registers as "Molecule"
            pass

        class Molecule(Entity, head="Mol"):  # registers as "Mol"
            pass
    """
    super().__init_subclass__(**kwargs)
    name = head if head else cls.__name__
    register_head(name, cls)

__init__(name, *, parent=None, dat=None, description='')

Initialize an entity.

Parameters:

Name Type Description Default
name str

Local name within parent's children dict

required
parent Optional[Entity]

Link to containing entity (optional if dat provided)

None
dat Optional[Dat]

DAT anchor to filesystem (optional if parent provided)

None
description str

Human-readable description

''

Raises:

Type Description
ValueError

If neither parent nor dat is provided

ValueError

If name contains spaces

Source code in src/alienbio/infra/entity.py
def __init__(
    self,
    name: str,
    *,
    parent: Optional[Entity] = None,
    dat: Optional[Dat] = None,
    description: str = "",
) -> None:
    """Initialize an entity.

    Args:
        name: Local name within parent's children dict
        parent: Link to containing entity (optional if dat provided)
        dat: DAT anchor to filesystem (optional if parent provided)
        description: Human-readable description

    Raises:
        ValueError: If neither parent nor dat is provided
        ValueError: If name contains spaces
    """
    if parent is None and dat is None:
        raise ValueError(
            f"Entity {name!r} must have either a parent or a DAT anchor"
        )

    if " " in name:
        raise ValueError(
            f"Entity name {name!r} contains spaces; names must be valid identifiers"
        )

    self._local_name = name
    self._parent: Optional[Entity] = None
    self._children: Dict[str, Entity] = {}
    self.description = description

    # Set _top: Dat for root entities, root Entity for non-roots
    if dat is not None:
        self._top: Entity | Dat = dat
    else:
        # Will be set properly in set_parent()
        self._top = parent.root()  # type: ignore[union-attr]

    # Set parent (which also registers us as a child and updates _top)
    if parent is not None:
        self.set_parent(parent)

hydrate(data, *, dat=None, parent=None, local_name=None) classmethod

Create an entity instance from a dict.

This is the standard way to convert YAML/JSON data to typed objects. Subclasses should override to handle their specific fields.

Parameters:

Name Type Description Default
data dict[str, Any]

Dict containing entity data

required
dat Optional[Dat]

DAT anchor (if this is a root entity)

None
parent Optional[Entity]

Parent entity (if this is a child)

None
local_name Optional[str]

Override the local name (defaults to data.get("name"))

None

Returns:

Type Description
Self

New instance of the entity class

Example

mol = MoleculeImpl.hydrate({"name": "A", "bdepth": 0}) chem = ChemistryImpl.hydrate({"molecules": {...}, "reactions": {...}})

Source code in src/alienbio/infra/entity.py
@classmethod
def hydrate(
    cls,
    data: dict[str, Any],
    *,
    dat: Optional[Dat] = None,
    parent: Optional[Entity] = None,
    local_name: Optional[str] = None,
) -> Self:
    """Create an entity instance from a dict.

    This is the standard way to convert YAML/JSON data to typed objects.
    Subclasses should override to handle their specific fields.

    Args:
        data: Dict containing entity data
        dat: DAT anchor (if this is a root entity)
        parent: Parent entity (if this is a child)
        local_name: Override the local name (defaults to data.get("name"))

    Returns:
        New instance of the entity class

    Example:
        mol = MoleculeImpl.hydrate({"name": "A", "bdepth": 0})
        chem = ChemistryImpl.hydrate({"molecules": {...}, "reactions": {...}})
    """
    # If neither dat nor parent provided, create a mock dat
    if dat is None and parent is None:
        name = local_name or data.get("name", cls.__name__.lower())
        dat = MockDat(f"{cls.__name__.lower()}/{name}")

    # Get name from data or use provided local_name
    name = local_name or data.get("name", cls.__name__.lower())

    # Base Entity just takes name, parent/dat, description
    return cls(
        name,
        parent=parent,
        dat=dat,
        description=data.get("description", ""),
    )

attributes()

Semantic content of this entity (override in subclasses).

Returns a dict of the entity's keyword arguments - its semantic content excluding head and children (args).

Subclasses should override this to include their specific fields.

Source code in src/alienbio/infra/entity.py
def attributes(self) -> Dict[str, Any]:
    """Semantic content of this entity (override in subclasses).

    Returns a dict of the entity's keyword arguments - its semantic
    content excluding head and children (args).

    Subclasses should override this to include their specific fields.
    """
    result: Dict[str, Any] = {"name": self._local_name}
    if self.description:
        result["description"] = self.description
    return result

dat()

Get the DAT anchor for this entity's tree.

O(1) operation using the _top field.

Source code in src/alienbio/infra/entity.py
def dat(self) -> Dat:
    """Get the DAT anchor for this entity's tree.

    O(1) operation using the _top field.
    """
    if not isinstance(self._top, Entity):
        return self._top  # I am the root (_top is a Dat)
    # _top is the root Entity, get its DAT
    return self._top._top  # type: ignore[return-value]

root()

Get the root entity (the ancestor with the DAT anchor).

O(1) operation using the _top field.

Source code in src/alienbio/infra/entity.py
def root(self) -> Entity:
    """Get the root entity (the ancestor with the DAT anchor).

    O(1) operation using the _top field.
    """
    if not isinstance(self._top, Entity):
        return self  # I am the root (_top is a Dat)
    return self._top  # Direct pointer to root

set_parent(parent)

Set the parent entity.

Handles registration/deregistration in parent's children dict. Updates _top for this entity and all descendants.

If parent is None, reparents to orphan root (entities are never invalid).

Source code in src/alienbio/infra/entity.py
def set_parent(self, parent: Optional[Entity]) -> None:
    """Set the parent entity.

    Handles registration/deregistration in parent's children dict.
    Updates _top for this entity and all descendants.

    If parent is None, reparents to orphan root (entities are never invalid).
    """
    # Remove from old parent's children
    if self._parent is not None:
        self._parent._children.pop(self._local_name, None)

    # If parent is None, reparent to orphan root instead
    if parent is None:
        from alienbio import bio
        parent = bio.io.orphan_root

    self._parent = parent

    # Add to new parent's children and update _top
    if self._local_name in parent._children:
        raise ValueError(
            f"Parent already has child named {self._local_name!r}"
        )
    parent._children[self._local_name] = self
    # Update _top for this subtree to point to new root
    self._update_top(parent.root())

detach()

Detach this entity from its parent.

The entity is reparented to the orphan root and remains fully valid. It can be re-attached later using set_parent().

Prints as ORPHAN:name after detaching.

Source code in src/alienbio/infra/entity.py
def detach(self) -> None:
    """Detach this entity from its parent.

    The entity is reparented to the orphan root and remains fully valid.
    It can be re-attached later using set_parent().

    Prints as ORPHAN:name after detaching.
    """
    from alienbio import bio
    self.set_parent(bio.io.orphan_root)

to_dict(recursive=False, _root=None)

Convert entity to dictionary representation for serialization.

The dict has three parts (like a function call): - head: the entity type name - args: children (contained entities) - only if present and recursive - **attributes: semantic content (name, description, subclass fields)

Parameters:

Name Type Description Default
recursive bool

If True, include children recursively

False
_root Optional[Entity]

Internal - the root entity we're serializing from (to detect children with different roots that need absolute refs)

None

Returns:

Type Description
Dict[str, Any]

Dict with entity fields suitable for YAML/JSON serialization.

Source code in src/alienbio/infra/entity.py
def to_dict(self, recursive: bool = False, _root: Optional[Entity] = None) -> Dict[str, Any]:
    """Convert entity to dictionary representation for serialization.

    The dict has three parts (like a function call):
    - head: the entity type name
    - args: children (contained entities) - only if present and recursive
    - **attributes: semantic content (name, description, subclass fields)

    Args:
        recursive: If True, include children recursively
        _root: Internal - the root entity we're serializing from (to detect
               children with different roots that need absolute refs)

    Returns:
        Dict with entity fields suitable for YAML/JSON serialization.
    """
    # Start with head
    result: Dict[str, Any] = {"head": self.head}

    # Add attributes (semantic content)
    result.update(self.attributes())

    # Add args (children) if recursive and present
    if recursive and self._children:
        # Track the root entity for this serialization
        if _root is None:
            _root = self.root()

        args_dict: Dict[str, Any] = {}
        for name, child in self._children.items():
            child_root = child.root()
            if child_root is not _root:
                # Child belongs to a different DAT - use absolute ref
                from alienbio import bio
                args_dict[name] = bio.io.ref(child, absolute=True)
            else:
                # Same DAT - inline the child
                args_dict[name] = child.to_dict(recursive=True, _root=_root)
        result["args"] = args_dict

    return result

to_str(depth=-1)

String representation of entity tree.

Returns a function-call style representation showing the entity and optionally its children.

Parameters:

Name Type Description Default
depth int

How deep to recurse into children. -1 = unlimited, 0 = just this entity, 1 = include immediate children, etc.

-1

Returns:

Type Description
str

String like "World(Cytoplasm(Glucose, ATP), Nucleus)"

Example

entity.to_str() # full tree entity.to_str(0) # just "World" entity.to_str(1) # "World(Cytoplasm, Nucleus)"

Source code in src/alienbio/infra/entity.py
def to_str(self, depth: int = -1) -> str:
    """String representation of entity tree.

    Returns a function-call style representation showing the entity
    and optionally its children.

    Args:
        depth: How deep to recurse into children.
               -1 = unlimited, 0 = just this entity,
               1 = include immediate children, etc.

    Returns:
        String like "World(Cytoplasm(Glucose, ATP), Nucleus)"

    Example:
        entity.to_str()      # full tree
        entity.to_str(0)     # just "World"
        entity.to_str(1)     # "World(Cytoplasm, Nucleus)"
    """
    if not self._children or depth == 0:
        return self._local_name

    next_depth = -1 if depth == -1 else depth - 1
    children_str = ", ".join(
        child.to_str(next_depth) for child in self._children.values()
    )
    return f"{self._local_name}({children_str})"

ancestors()

Iterate over ancestors from parent to root.

Source code in src/alienbio/infra/entity.py
def ancestors(self) -> Iterator[Entity]:
    """Iterate over ancestors from parent to root."""
    current = self._parent
    while current is not None:
        yield current
        current = current._parent

descendants()

Iterate over all descendants (depth-first).

Source code in src/alienbio/infra/entity.py
def descendants(self) -> Iterator[Entity]:
    """Iterate over all descendants (depth-first)."""
    for child in self._children.values():
        yield child
        yield from child.descendants()

save()

Save this entity tree to disk.

Must be called on the root entity (the one with the DAT anchor). Serializes the entire entity tree to entities.yaml in the DAT folder.

Raises:

Type Description
ValueError

If not called on a root entity

ValueError

If called on orphan root (orphans cannot be saved)

Source code in src/alienbio/infra/entity.py
def save(self) -> None:
    """Save this entity tree to disk.

    Must be called on the root entity (the one with the DAT anchor).
    Serializes the entire entity tree to entities.yaml in the DAT folder.

    Raises:
        ValueError: If not called on a root entity
        ValueError: If called on orphan root (orphans cannot be saved)
    """
    import yaml
    from pathlib import Path
    from .io import _OrphanDat

    if isinstance(self._top, Entity):
        raise ValueError(
            f"save() must be called on root entity. "
            f"Use self.root().save() instead."
        )

    if isinstance(self._top, _OrphanDat):
        raise ValueError(
            "Cannot save orphan entities - re-attach them to a real DAT first"
        )

    dat = self._top

    # Serialize the entity tree
    entity_data = self.to_dict(recursive=True)

    # Write to entities.yaml in DAT folder
    dat_path = Path(dat.get_path())
    entities_file = dat_path / "entities.yaml"
    with open(entities_file, "w") as f:
        yaml.dump(entity_data, f, default_flow_style=False, sort_keys=False)

    # Also save the DAT's spec
    dat.save()

__repr__()

Full reconstructible representation.

Source code in src/alienbio/infra/entity.py
def __repr__(self) -> str:
    """Full reconstructible representation."""
    parts = [f"name={self._local_name!r}"]
    if self.description:
        parts.append(f"description={self.description!r}")
    if not isinstance(self._top, Entity) and self._top is not None:
        parts.append(f"dat={self._top.get_path_name()!r}")
    if self._parent is not None:
        parts.append(f"parent={self._parent._local_name!r}")
    if self._children:
        parts.append(f"children={list(self._children.keys())}")
    return f"Entity({', '.join(parts)})"

__str__()

Short display form using PREFIX:path if IO available.

Falls back to full_name if no IO or prefix matches.

Source code in src/alienbio/infra/entity.py
def __str__(self) -> str:
    """Short display form using PREFIX:path if IO available.

    Falls back to full_name if no IO or prefix matches.
    """
    try:
        from alienbio import bio

        return bio.io.ref(self)
    except Exception:
        # Fall back to full_name if context not available
        try:
            return self.full_name
        except ValueError:
            return f"<Entity:{self._local_name}>"

IO

Entity I/O: naming, formatting, lookup, and persistence.

IO handles all external representation concerns for entities: - Prefix bindings: Maps short prefixes (R:, W:) to Entity or path string - Formatting: Converts entities to PREFIX:path strings - Lookup: Converts PREFIX:path strings back to entities - Persistence: Load/save entities via DAT

The 'D:' prefix is always bound to the data root as an escape hatch.

Note: For data path, use Dat.manager.sync_folder (single source of truth).

Source code in src/alienbio/infra/io.py
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
class IO:
    """Entity I/O: naming, formatting, lookup, and persistence.

    IO handles all external representation concerns for entities:
    - Prefix bindings: Maps short prefixes (R:, W:) to Entity or path string
    - Formatting: Converts entities to PREFIX:path strings
    - Lookup: Converts PREFIX:path strings back to entities
    - Persistence: Load/save entities via DAT

    The 'D:' prefix is always bound to the data root as an escape hatch.

    Note: For data path, use Dat.manager.sync_folder (single source of truth).
    """

    def __init__(self) -> None:
        """Initialize IO."""
        self._prefixes: Dict[str, Entity | str] = {}
        self._path_entity_cache: Dict[str, Entity] = {}
        self._dat_entity_cache: Dict[str, Entity] = {}  # DAT path -> root entity
        self._root_entity: Optional[_RootEntity] = None
        self._orphan_dat: Optional[_OrphanDat] = None
        self._orphan_root: Optional[Entity] = None

    @property
    def _data_root(self) -> _RootEntity:
        """Lazy-initialized root entity for D: prefix."""
        if self._root_entity is None:
            self._root_entity = _RootEntity()
        return self._root_entity

    @property
    def orphan_root(self) -> Entity:
        """Lazy-initialized root entity for orphaned entities.

        Detached entities are re-parented here instead of becoming invalid.
        The ORPHAN: prefix is automatically bound to this root.
        """
        if self._orphan_root is None:
            from .entity import Entity
            self._orphan_dat = _OrphanDat()
            self._orphan_root = Entity("orphans", dat=self._orphan_dat)
            self._prefixes["ORPHAN"] = self._orphan_root
        return self._orphan_root

    @property
    def prefixes(self) -> Dict[str, Entity | str]:
        """Current prefix bindings (read-only copy)."""
        return self._prefixes.copy()

    def bind_prefix(self, prefix: str, target: Entity | str) -> None:
        """Bind a prefix to an entity or path string.

        Args:
            prefix: Short prefix string (e.g., "R", "W", "M")
            target: Entity to bind, or path string to DAT location

        Example:
            io.bind_prefix("W", world_entity)       # bind to Entity
            io.bind_prefix("R", "runs/experiment1") # bind to path
        """
        self._prefixes[prefix] = target

    def unbind_prefix(self, prefix: str) -> Optional[Entity | str]:
        """Remove a prefix binding.

        Args:
            prefix: Prefix to unbind

        Returns:
            The previously bound target, or None if not bound
        """
        return self._prefixes.pop(prefix, None)

    def resolve_prefix(self, prefix: str) -> Entity:
        """Get the entity bound to a prefix.

        If prefix is bound to a path string, loads/creates an Entity for it.
        The special prefix 'D' always resolves to the data root.

        Args:
            prefix: Prefix to resolve

        Returns:
            The entity bound to this prefix

        Raises:
            KeyError: If prefix is not bound
        """
        # Special case: D always resolves to data root
        if prefix == "D":
            return self._data_root

        if prefix not in self._prefixes:
            raise KeyError(f"Prefix {prefix!r} is not bound")

        target = self._prefixes[prefix]

        if isinstance(target, str):
            return self._resolve_path_to_entity(target)

        return target

    def _resolve_path_to_entity(self, path: str) -> Entity:
        """Resolve a path string to an Entity, caching the result.

        Args:
            path: Path to DAT location

        Returns:
            Entity wrapping the DAT at that path
        """
        if path in self._path_entity_cache:
            return self._path_entity_cache[path]

        # Import here to avoid circular import
        from .entity import Entity

        # Load the DAT and create an Entity wrapper
        dat = Dat.load(path)
        # Use the last path component as the entity name
        name = Path(path).name
        entity = Entity(name, dat=dat)

        self._path_entity_cache[path] = entity
        return entity

    def ref(
        self, entity: Entity, prefer_short: bool = True, absolute: bool = False
    ) -> str:
        """Get reference string for entity.

        Args:
            entity: Entity to get reference for
            prefer_short: If True, uses shortest matching prefix (ignored if absolute)
            absolute: If True, returns absolute format </dat/path.entity.path>

        Returns:
            String in PREFIX:path format (e.g., "W:cytoplasm.glucose")
            or absolute format (e.g., "</runs/exp1.cytoplasm.glucose>")

        Example:
            io.ref(glucose)                # -> "W:cytoplasm.glucose"
            io.ref(glucose, absolute=True) # -> "</runs/exp1.cytoplasm.glucose>"
        """
        if absolute:
            return self._absolute_ref(entity)

        # Find which prefixes match this entity's ancestry
        matches: list[tuple[str, str]] = []  # (prefix, remaining_path)

        # Check user-bound prefixes
        for prefix, target in self._prefixes.items():
            resolved = self.resolve_prefix(prefix)
            path = self._relative_path(entity, resolved)
            if path is not None:
                matches.append((prefix, path))

        # Always check D: prefix (data root) as fallback
        d_path = self._relative_path(entity, self._data_root)
        if d_path is not None:
            matches.append(("D", d_path))

        if not matches:
            # No prefix matches, use full name
            return entity.full_name

        if prefer_short:
            # Sort by path length (shortest first)
            matches.sort(key=lambda x: len(x[1]))

        prefix, path = matches[0]
        if path:
            return f"{prefix}:{path}"
        return f"{prefix}:"

    def _absolute_ref(self, entity: Entity) -> str:
        """Get absolute reference string for entity.

        Format: </dat/path.entity.path>
        - dat/path is the filesystem path to the DAT
        - entity.path is the dotted path from DAT root to entity

        Example: </runs/exp1.cytoplasm.glucose>
        """
        # Get the root entity and its DAT (both O(1))
        root = entity.root()
        dat = root.dat()
        if dat is None:
            raise ValueError(
                f"Entity {entity.local_name!r} has no DAT anchor for absolute ref"
            )

        dat_path = dat.get_path_name()

        # Build entity path from root to this entity
        entity_parts: list[str] = []
        current: Optional[Entity] = entity

        while current is not None and current is not root:
            entity_parts.append(current.local_name)
            current = current.parent

        entity_parts.reverse()
        entity_path = ".".join(entity_parts)

        if entity_path:
            return f"</{dat_path}.{entity_path}>"
        return f"</{dat_path}>"

    def _relative_path(self, entity: Entity, ancestor: Entity) -> Optional[str]:
        """Compute relative path from ancestor to entity.

        Returns None if ancestor is not in entity's ancestry.
        Returns "" if entity is the ancestor.
        Returns dotted path otherwise.

        Special handling for _RootEntity: matches based on full_name prefix.
        """
        if entity is ancestor:
            return ""

        # Special handling for _RootEntity (virtual data root)
        if isinstance(ancestor, _RootEntity):
            try:
                entity_path = entity.full_name
                root_path = ancestor.full_name
                if entity_path.startswith(root_path):
                    # Strip root path and leading separator
                    relative = entity_path[len(root_path):].lstrip("/")
                    # Convert slashes to dots for consistency
                    return relative.replace("/", ".")
            except (ValueError, AttributeError):
                pass
            return None

        # Walk up from entity, building path segments
        path_parts: list[str] = []
        current: Optional[Entity] = entity

        while current is not None:
            if current is ancestor:
                # Found the ancestor, return path
                path_parts.reverse()
                return ".".join(path_parts)
            path_parts.append(current.local_name)
            current = current.parent

        # Ancestor not found in entity's ancestry
        return None

    def lookup(self, string: str) -> Entity:
        """Look up entity by reference string.

        Supports two formats:
        - PREFIX:path (e.g., "W:cytoplasm.glucose") - prefix-relative
        - </dat/path.entity.path> (e.g., "</runs/exp1.cytoplasm>") - absolute

        For absolute format, loads the DAT if not already loaded.

        Args:
            string: Reference string in either format

        Returns:
            The entity at the specified path

        Raises:
            ValueError: If string format is invalid
            KeyError: If prefix is not bound or path not found

        Example:
            io.lookup("W:cytoplasm.glucose")       # prefix-relative
            io.lookup("</runs/exp1.cytoplasm>")   # absolute
        """
        # Check for absolute format: </dat/path.entity.path>
        if string.startswith("</") and string.endswith(">"):
            return self._absolute_lookup(string)

        # Prefix-relative format: PREFIX:path
        if ":" not in string:
            raise ValueError(
                f"Invalid entity reference {string!r}: missing prefix separator ':'"
            )

        prefix, path = string.split(":", 1)

        if not prefix:
            raise ValueError(f"Invalid entity reference {string!r}: empty prefix")

        target = self.resolve_prefix(prefix)

        if not path:
            return target

        return self._walk_path(target, path)

    def _absolute_lookup(self, string: str) -> Entity:
        """Look up entity by absolute reference.

        Format: </dat/path.entity.path>
        - dat/path is the filesystem path to the DAT
        - entity.path is the dotted path from DAT root to entity

        Loads the DAT if not already cached.
        """
        # Strip </ and >
        inner = string[2:-1]

        # Find first dot to split DAT path from entity path
        dot_idx = inner.find(".")
        if dot_idx == -1:
            # No entity path, just DAT path
            dat_path = inner
            entity_path = ""
        else:
            dat_path = inner[:dot_idx]
            entity_path = inner[dot_idx + 1:]

        # Load or retrieve cached DAT entity
        root_entity = self._load_dat_entity(dat_path)

        if not entity_path:
            return root_entity

        return self._walk_path(root_entity, entity_path)

    def _load_dat_entity(self, dat_path: str) -> Entity:
        """Load a DAT and return its root entity, with caching.

        If the DAT has already been loaded, returns the cached entity.
        Otherwise loads from filesystem and caches.

        If entities.yaml exists in the DAT folder, loads the entity tree
        from it, recursively creating children.
        """
        # Check cache first
        if dat_path in self._dat_entity_cache:
            return self._dat_entity_cache[dat_path]

        # Import here to avoid circular import
        from .entity import Entity

        # Load the DAT
        dat = Dat.load(dat_path)

        # Check for entities.yaml
        dat_folder = Path(dat.get_path())
        entities_file = dat_folder / "entities.yaml"

        if entities_file.exists():
            import yaml

            with open(entities_file) as f:
                entity_data = yaml.safe_load(f)

            # Create root entity from loaded data
            entity = self._create_entity_from_dict(entity_data, dat=dat)
        else:
            # Create root entity from DAT
            # Use the last path component as the entity name
            name = Path(dat_path).name
            entity = Entity(name, dat=dat)

        # Cache and return
        self._dat_entity_cache[dat_path] = entity
        return entity

    def _create_entity_from_dict(
        self,
        data: Dict[str, Any],
        *,
        dat: Optional[Dat] = None,
        parent: Optional[Entity] = None,
    ) -> Entity:
        """Create an entity and its children from a dict.

        Uses type dispatch: looks up 'type' field in the entity registry
        to instantiate the correct Entity subclass.

        Args:
            data: Dict with 'type', 'name', optional 'description', optional 'children'
            dat: DAT anchor (required if no parent)
            parent: Parent entity (required if no dat)

        Returns:
            The created entity (may be a subclass based on 'head' field)
        """
        from .entity import Entity, get_entity_class

        # Get the entity class from head field (default to Entity)
        head_name = data.get("head", "Entity")
        try:
            entity_cls = get_entity_class(head_name)
        except KeyError:
            # Unknown head - fall back to base Entity
            entity_cls = Entity

        name = data.get("name", "unnamed")
        description = data.get("description", "")

        # Create entity using the resolved class
        entity = entity_cls(name, parent=parent, dat=dat, description=description)

        # Recursively create children (args)
        args_data = data.get("args", {})
        for child_name, child_data in args_data.items():
            if isinstance(child_data, str) and child_data.startswith("</"):
                # Absolute ref - load from another DAT
                child = self._absolute_lookup(child_data)
                # Reparent to this entity
                child.set_parent(entity)
            elif isinstance(child_data, dict):
                # Inline child definition
                self._create_entity_from_dict(child_data, parent=entity)
            # else: skip invalid entries

        return entity

    def _walk_path(self, entity: Entity, path: str) -> Entity:
        """Walk down a dotted path from an entity.

        Args:
            entity: Starting entity
            path: Dotted path like "compartment.glucose"

        Returns:
            The entity at the given path

        Raises:
            KeyError: If path not found
        """
        if not path:
            return entity

        parts = path.split(".", 1)
        name = parts[0]

        children = entity.children
        if name not in children:
            raise KeyError(f"No child named {name!r} in {entity.local_name!r}")

        child = children[name]
        if len(parts) == 1:
            return child
        return self._walk_path(child, parts[1])

    def resolve_refs(self, obj: Any) -> Any:
        """Recursively replace <PREFIX:path> strings with Entity objects.

        Walks a data structure (dict, list, or scalar) and replaces any
        strings matching the <PREFIX:path> pattern with the corresponding
        Entity objects.

        Args:
            obj: Data structure to process (dict, list, or scalar)

        Returns:
            New structure with entity references resolved

        Example:
            data = yaml.safe_load(file)
            data = io.resolve_refs(data)  # <W:glucose> → Entity
        """
        if isinstance(obj, str):
            if obj.startswith("<") and obj.endswith(">") and len(obj) > 2:
                return self.lookup(obj[1:-1])  # strip < >
            return obj
        elif isinstance(obj, dict):
            return {k: self.resolve_refs(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self.resolve_refs(item) for item in obj]
        else:
            return obj

    def insert_refs(self, obj: Any) -> Any:
        """Recursively replace Entity objects with <PREFIX:path> strings.

        Walks a data structure (dict, list, or scalar) and replaces any
        Entity objects with their <PREFIX:path> string representation.

        Args:
            obj: Data structure to process (dict, list, or scalar)

        Returns:
            New structure with entities replaced by reference strings

        Example:
            output = io.insert_refs(data)  # Entity → <W:glucose>
            yaml.dump(output, file)
        """
        # Import here to avoid circular import
        from .entity import Entity

        if isinstance(obj, Entity):
            return f"<{self.ref(obj)}>"
        elif isinstance(obj, dict):
            return {k: self.insert_refs(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self.insert_refs(item) for item in obj]
        else:
            return obj

    def load(self, path: str | Path) -> Dat:
        """Load a Dat from data path.

        Args:
            path: Path relative to data root, or absolute path

        Returns:
            The loaded Dat
        """
        return Dat.load(str(path))

    def save(self, obj: Any, path: str | Path) -> Dat:
        """Save object as Dat to data path.

        Args:
            obj: Object to save. If dict, uses as spec. Otherwise wraps in {"value": obj}.
            path: Path relative to Dat.manager.sync_folder

        Returns:
            The created Dat
        """
        if isinstance(obj, Dat):
            obj.save()
            return obj
        # Create a new Dat with the object as spec
        # Dat.create handles path resolution via Dat.manager.sync_folder
        spec = obj if isinstance(obj, dict) else {"value": obj}
        return Dat.create(path=str(path), spec=spec)

orphan_root property

Lazy-initialized root entity for orphaned entities.

Detached entities are re-parented here instead of becoming invalid. The ORPHAN: prefix is automatically bound to this root.

prefixes property

Current prefix bindings (read-only copy).

__init__()

Initialize IO.

Source code in src/alienbio/infra/io.py
def __init__(self) -> None:
    """Initialize IO."""
    self._prefixes: Dict[str, Entity | str] = {}
    self._path_entity_cache: Dict[str, Entity] = {}
    self._dat_entity_cache: Dict[str, Entity] = {}  # DAT path -> root entity
    self._root_entity: Optional[_RootEntity] = None
    self._orphan_dat: Optional[_OrphanDat] = None
    self._orphan_root: Optional[Entity] = None

bind_prefix(prefix, target)

Bind a prefix to an entity or path string.

Parameters:

Name Type Description Default
prefix str

Short prefix string (e.g., "R", "W", "M")

required
target Entity | str

Entity to bind, or path string to DAT location

required
Example

io.bind_prefix("W", world_entity) # bind to Entity io.bind_prefix("R", "runs/experiment1") # bind to path

Source code in src/alienbio/infra/io.py
def bind_prefix(self, prefix: str, target: Entity | str) -> None:
    """Bind a prefix to an entity or path string.

    Args:
        prefix: Short prefix string (e.g., "R", "W", "M")
        target: Entity to bind, or path string to DAT location

    Example:
        io.bind_prefix("W", world_entity)       # bind to Entity
        io.bind_prefix("R", "runs/experiment1") # bind to path
    """
    self._prefixes[prefix] = target

unbind_prefix(prefix)

Remove a prefix binding.

Parameters:

Name Type Description Default
prefix str

Prefix to unbind

required

Returns:

Type Description
Optional[Entity | str]

The previously bound target, or None if not bound

Source code in src/alienbio/infra/io.py
def unbind_prefix(self, prefix: str) -> Optional[Entity | str]:
    """Remove a prefix binding.

    Args:
        prefix: Prefix to unbind

    Returns:
        The previously bound target, or None if not bound
    """
    return self._prefixes.pop(prefix, None)

resolve_prefix(prefix)

Get the entity bound to a prefix.

If prefix is bound to a path string, loads/creates an Entity for it. The special prefix 'D' always resolves to the data root.

Parameters:

Name Type Description Default
prefix str

Prefix to resolve

required

Returns:

Type Description
Entity

The entity bound to this prefix

Raises:

Type Description
KeyError

If prefix is not bound

Source code in src/alienbio/infra/io.py
def resolve_prefix(self, prefix: str) -> Entity:
    """Get the entity bound to a prefix.

    If prefix is bound to a path string, loads/creates an Entity for it.
    The special prefix 'D' always resolves to the data root.

    Args:
        prefix: Prefix to resolve

    Returns:
        The entity bound to this prefix

    Raises:
        KeyError: If prefix is not bound
    """
    # Special case: D always resolves to data root
    if prefix == "D":
        return self._data_root

    if prefix not in self._prefixes:
        raise KeyError(f"Prefix {prefix!r} is not bound")

    target = self._prefixes[prefix]

    if isinstance(target, str):
        return self._resolve_path_to_entity(target)

    return target

ref(entity, prefer_short=True, absolute=False)

Get reference string for entity.

Parameters:

Name Type Description Default
entity Entity

Entity to get reference for

required
prefer_short bool

If True, uses shortest matching prefix (ignored if absolute)

True
absolute bool

If True, returns absolute format

False

Returns:

Type Description
str

String in PREFIX:path format (e.g., "W:cytoplasm.glucose")

str

or absolute format (e.g., "")

Example

io.ref(glucose) # -> "W:cytoplasm.glucose" io.ref(glucose, absolute=True) # -> ""

Source code in src/alienbio/infra/io.py
def ref(
    self, entity: Entity, prefer_short: bool = True, absolute: bool = False
) -> str:
    """Get reference string for entity.

    Args:
        entity: Entity to get reference for
        prefer_short: If True, uses shortest matching prefix (ignored if absolute)
        absolute: If True, returns absolute format </dat/path.entity.path>

    Returns:
        String in PREFIX:path format (e.g., "W:cytoplasm.glucose")
        or absolute format (e.g., "</runs/exp1.cytoplasm.glucose>")

    Example:
        io.ref(glucose)                # -> "W:cytoplasm.glucose"
        io.ref(glucose, absolute=True) # -> "</runs/exp1.cytoplasm.glucose>"
    """
    if absolute:
        return self._absolute_ref(entity)

    # Find which prefixes match this entity's ancestry
    matches: list[tuple[str, str]] = []  # (prefix, remaining_path)

    # Check user-bound prefixes
    for prefix, target in self._prefixes.items():
        resolved = self.resolve_prefix(prefix)
        path = self._relative_path(entity, resolved)
        if path is not None:
            matches.append((prefix, path))

    # Always check D: prefix (data root) as fallback
    d_path = self._relative_path(entity, self._data_root)
    if d_path is not None:
        matches.append(("D", d_path))

    if not matches:
        # No prefix matches, use full name
        return entity.full_name

    if prefer_short:
        # Sort by path length (shortest first)
        matches.sort(key=lambda x: len(x[1]))

    prefix, path = matches[0]
    if path:
        return f"{prefix}:{path}"
    return f"{prefix}:"

lookup(string)

Look up entity by reference string.

Supports two formats: - PREFIX:path (e.g., "W:cytoplasm.glucose") - prefix-relative - (e.g., "") - absolute

For absolute format, loads the DAT if not already loaded.

Parameters:

Name Type Description Default
string str

Reference string in either format

required

Returns:

Type Description
Entity

The entity at the specified path

Raises:

Type Description
ValueError

If string format is invalid

KeyError

If prefix is not bound or path not found

Example

io.lookup("W:cytoplasm.glucose") # prefix-relative io.lookup("") # absolute

Source code in src/alienbio/infra/io.py
def lookup(self, string: str) -> Entity:
    """Look up entity by reference string.

    Supports two formats:
    - PREFIX:path (e.g., "W:cytoplasm.glucose") - prefix-relative
    - </dat/path.entity.path> (e.g., "</runs/exp1.cytoplasm>") - absolute

    For absolute format, loads the DAT if not already loaded.

    Args:
        string: Reference string in either format

    Returns:
        The entity at the specified path

    Raises:
        ValueError: If string format is invalid
        KeyError: If prefix is not bound or path not found

    Example:
        io.lookup("W:cytoplasm.glucose")       # prefix-relative
        io.lookup("</runs/exp1.cytoplasm>")   # absolute
    """
    # Check for absolute format: </dat/path.entity.path>
    if string.startswith("</") and string.endswith(">"):
        return self._absolute_lookup(string)

    # Prefix-relative format: PREFIX:path
    if ":" not in string:
        raise ValueError(
            f"Invalid entity reference {string!r}: missing prefix separator ':'"
        )

    prefix, path = string.split(":", 1)

    if not prefix:
        raise ValueError(f"Invalid entity reference {string!r}: empty prefix")

    target = self.resolve_prefix(prefix)

    if not path:
        return target

    return self._walk_path(target, path)

resolve_refs(obj)

Recursively replace strings with Entity objects.

Walks a data structure (dict, list, or scalar) and replaces any strings matching the pattern with the corresponding Entity objects.

Parameters:

Name Type Description Default
obj Any

Data structure to process (dict, list, or scalar)

required

Returns:

Type Description
Any

New structure with entity references resolved

Example

data = yaml.safe_load(file) data = io.resolve_refs(data) # → Entity

Source code in src/alienbio/infra/io.py
def resolve_refs(self, obj: Any) -> Any:
    """Recursively replace <PREFIX:path> strings with Entity objects.

    Walks a data structure (dict, list, or scalar) and replaces any
    strings matching the <PREFIX:path> pattern with the corresponding
    Entity objects.

    Args:
        obj: Data structure to process (dict, list, or scalar)

    Returns:
        New structure with entity references resolved

    Example:
        data = yaml.safe_load(file)
        data = io.resolve_refs(data)  # <W:glucose> → Entity
    """
    if isinstance(obj, str):
        if obj.startswith("<") and obj.endswith(">") and len(obj) > 2:
            return self.lookup(obj[1:-1])  # strip < >
        return obj
    elif isinstance(obj, dict):
        return {k: self.resolve_refs(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [self.resolve_refs(item) for item in obj]
    else:
        return obj

insert_refs(obj)

Recursively replace Entity objects with strings.

Walks a data structure (dict, list, or scalar) and replaces any Entity objects with their string representation.

Parameters:

Name Type Description Default
obj Any

Data structure to process (dict, list, or scalar)

required

Returns:

Type Description
Any

New structure with entities replaced by reference strings

Example

output = io.insert_refs(data) # Entity → yaml.dump(output, file)

Source code in src/alienbio/infra/io.py
def insert_refs(self, obj: Any) -> Any:
    """Recursively replace Entity objects with <PREFIX:path> strings.

    Walks a data structure (dict, list, or scalar) and replaces any
    Entity objects with their <PREFIX:path> string representation.

    Args:
        obj: Data structure to process (dict, list, or scalar)

    Returns:
        New structure with entities replaced by reference strings

    Example:
        output = io.insert_refs(data)  # Entity → <W:glucose>
        yaml.dump(output, file)
    """
    # Import here to avoid circular import
    from .entity import Entity

    if isinstance(obj, Entity):
        return f"<{self.ref(obj)}>"
    elif isinstance(obj, dict):
        return {k: self.insert_refs(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [self.insert_refs(item) for item in obj]
    else:
        return obj

load(path)

Load a Dat from data path.

Parameters:

Name Type Description Default
path str | Path

Path relative to data root, or absolute path

required

Returns:

Type Description
Dat

The loaded Dat

Source code in src/alienbio/infra/io.py
def load(self, path: str | Path) -> Dat:
    """Load a Dat from data path.

    Args:
        path: Path relative to data root, or absolute path

    Returns:
        The loaded Dat
    """
    return Dat.load(str(path))

save(obj, path)

Save object as Dat to data path.

Parameters:

Name Type Description Default
obj Any

Object to save. If dict, uses as spec. Otherwise wraps in {"value": obj}.

required
path str | Path

Path relative to Dat.manager.sync_folder

required

Returns:

Type Description
Dat

The created Dat

Source code in src/alienbio/infra/io.py
def save(self, obj: Any, path: str | Path) -> Dat:
    """Save object as Dat to data path.

    Args:
        obj: Object to save. If dict, uses as spec. Otherwise wraps in {"value": obj}.
        path: Path relative to Dat.manager.sync_folder

    Returns:
        The created Dat
    """
    if isinstance(obj, Dat):
        obj.save()
        return obj
    # Create a new Dat with the object as spec
    # Dat.create handles path resolution via Dat.manager.sync_folder
    spec = obj if isinstance(obj, dict) else {"value": obj}
    return Dat.create(path=str(path), spec=spec)