Combo: Alien Exam¶
Agents evaluated on skinned (opaque) diagnosis tasks at multiple difficulty levels.
In [1]:
Copied!
import sys
from pathlib import Path
# Ensure alienbio is importable
_root = Path(".").resolve().parent.parent / "src"
if str(_root) not in sys.path:
sys.path.insert(0, str(_root))
_demos = Path(".").resolve().parent
if str(_demos) not in sys.path:
sys.path.insert(0, str(_demos))
%matplotlib inline
import sys
from pathlib import Path
# Ensure alienbio is importable
_root = Path(".").resolve().parent.parent / "src"
if str(_root) not in sys.path:
sys.path.insert(0, str(_root))
_demos = Path(".").resolve().parent
if str(_demos) not in sys.path:
sys.path.insert(0, str(_demos))
%matplotlib inline
In [2]:
Copied!
from _core import combo_alien_exam
fig_diff, fig_lead = combo_alien_exam()
from _core import combo_alien_exam
fig_diff, fig_lead = combo_alien_exam()
Difficulty Curves (Skinned Tasks)¶
In [3]:
Copied!
fig_diff
fig_diff
Out[3]:
Leaderboard (Difficulty 3)¶
In [4]:
Copied!
fig_lead
fig_lead
Out[4]: