Skip to main content
This walkthrough shows how to run the LinkedIn corporate monitoring benchmark with environment pools and GEPA, using Claude Code inside managed browser pools. You will stand up a LocalAPI proxy that routes rollouts into the environment-pools browser backend, then submit a GEPA job to evolve the LinkedIn skill file.

Architecture

Prerequisites

  • Python 3.13+
  • uv installed
  • API keys:
    • SYNTH_API_KEY
    • KERNEL_API_KEY
    • ANTHROPIC_API_KEY
  • Kernel CLI (npm install -g @onkernel/cli)
  • A LinkedIn-authenticated Kernel profile

1. Install the demo

git clone https://github.com/synth-laboratories/Benchmarking.git
cd Benchmarking/demos/linkedin_bench
uv sync

2. Create a LinkedIn browser profile + pool (Kernel)

kernel profiles create --name linkedin
kernel browsers create --stealth --profile-name linkedin --save-changes
Open the live view URL, log in to LinkedIn, then delete the browser:
kernel browsers delete <session-id>
Create the pool used by the environment-pools browser backend:
kernel browser-pools create \
  --name agent-gepa \
  --profile-name linkedin \
  --stealth \
  --size 10

3. Start a LocalAPI proxy that calls Environment Pools

Create a small proxy task app that translates GEPA rollouts into environment-pools browser rollouts. Save this as env_pool_task_app.py inside Benchmarking/demos/linkedin_bench:
import os
import time
from typing import Any

from synth_ai.data.artifacts import Artifact
from synth_ai.data.enums import SuccessStatus
from synth_ai.sdk.environment_pools import AgentSpec, create_rollout, get_rollout
from synth_ai.sdk.localapi import LocalAPIConfig, create_local_api
from synth_ai.sdk.localapi._impl.contracts import RolloutRequest, RolloutResponse, TaskInfo
from synth_ai.sdk.localapi._impl.rollout_helpers import build_rollout_response

from linkedin_bench.skill_template import get_skill_content
from linkedin_bench.tasks import TASKS, get_task_by_seed

APP_ID = "linkedin_bench_env_pools"
APP_NAME = "LinkedIn Env Pools Proxy"


def provide_task_instances(seeds=None) -> list[TaskInfo]:
    return [
        TaskInfo(id=str(i), name=task.id, description=task.prompt)
        for i, task in enumerate(TASKS)
    ]


def _skill_from_overrides(request: RolloutRequest) -> str:
    skill = get_skill_content()
    if not request.context_overrides:
        return skill
    for override in request.context_overrides:
        if override.file_artifacts:
            for path, content in override.file_artifacts.items():
                if "skill" in path.lower():
                    return content
    return skill


def _poll_rollout(rollout_id: str, backend_url: str, api_key: str, timeout_sec: float) -> dict[str, Any]:
    deadline = time.time() + timeout_sec
    while time.time() < deadline:
        status = get_rollout(rollout_id, backend_base=backend_url, api_key=api_key, timeout=30)
        if status.get("status") in ("succeeded", "failed", "cancelled", "error", "completed"):
            return status
        time.sleep(3)
    return get_rollout(rollout_id, backend_base=backend_url, api_key=api_key, timeout=30)


async def run_rollout(request: RolloutRequest, _fastapi_request: Any) -> RolloutResponse:
    backend_url = os.environ.get("ENV_POOLS_BASE_URL", "https://api.usesynth.ai")
    api_key = os.environ.get("SYNTH_API_KEY", "").strip()
    if not api_key:
        raise ValueError("SYNTH_API_KEY is required to call environment pools")

    seed = request.env.seed or 0
    task = get_task_by_seed(seed)
    skill_content = _skill_from_overrides(request)

    rollout_request = {
        "task_ref": {"dataset": "linkedin_bench", "task_id": task.id},
        "agent": AgentSpec.claude_code().model_dump(exclude_none=True),
        "environment": {"backend": "browser"},
        "browser": {
            "task_prompt": task.prompt,
            "skill": skill_content,
            "skill_domain": "linkedin.com",
            "profile": os.environ.get("KERNEL_PROFILE", "linkedin"),
            "timeout_sec": task.timeout,
            "headless": True,
            "capture_screenshot": True,
            "verifier_model": "claude-sonnet-4-20250514",
            "expected": task.expected,
        },
        "pool_tags": ["browser", "kernel"],
        "timeouts": {"agent_sec": task.timeout},
    }

    created = create_rollout(
        backend_base=backend_url,
        api_key=api_key,
        request=rollout_request,
        timeout=60,
    )
    rollout_id = created.get("rollout_id") or created.get("trial_id")
    if not rollout_id:
        raise RuntimeError("Environment pools did not return a rollout_id")

    final = _poll_rollout(rollout_id, backend_url, api_key, task.timeout + 120)
    reward = float(final.get("reward_primary") or 0.0)

    return build_rollout_response(
        request=request,
        outcome_reward=reward,
        success_status=SuccessStatus.SUCCESS if reward > 0 else SuccessStatus.FAILURE,
        status_detail=f"env pools status: {final.get('status')}",
        artifact=[
            Artifact(
                content={
                    "rollout_id": rollout_id,
                    "status": final.get("status"),
                    "pool_id": final.get("pool_id"),
                    "reward_primary": final.get("reward_primary"),
                },
                content_type="environment_pool_rollout",
            )
        ],
    )


app = create_local_api(
    LocalAPIConfig(
        app_id=APP_ID,
        name=APP_NAME,
        description="Proxy LinkedIn bench rollouts into environment pools.",
        provide_task_instances=provide_task_instances,
        rollout=run_rollout,
        cors_origins=["*"],
    )
)

if __name__ == "__main__":
    import uvicorn

    port = int(os.environ.get("PORT", "8030"))
    uvicorn.run(app, host="0.0.0.0", port=port)
Start the proxy:
export SYNTH_API_KEY=sk_live_...
export KERNEL_API_KEY=sk_...
export ANTHROPIC_API_KEY=sk-ant-...
uv run python env_pool_task_app.py

4. Expose the task app (SynthTunnel or ngrok)

Use SynthTunnel if it is working for you, or fall back to ngrok.
ngrok http 8030 --url your-subdomain.ngrok-free.app
export TASK_APP_URL=https://your-subdomain.ngrok-free.app

5. Submit the GEPA job

Run this from Benchmarking/demos/linkedin_bench:
uv run python - <<'PY'
import os
import time
import tomllib
from pathlib import Path

from linkedin_bench.skill_template import get_skill_content
from synth_ai.sdk.optimization.internal.prompt_learning import PromptLearningJob

backend_url = os.environ.get("SYNTH_BACKEND_URL", "https://api.usesynth.ai")
task_app_url = os.environ["TASK_APP_URL"]
api_key = os.environ["SYNTH_API_KEY"]

config_path = Path("linkedin_gepa.toml")
config = tomllib.loads(config_path.read_text())

config["prompt_learning"]["task_app_url"] = task_app_url

initial_skill = get_skill_content()
config["prompt_learning"]["initial_prompt"]["wildcards"]["skill_content"] = initial_skill
baseline_co = config["prompt_learning"]["gepa"]["baseline_context_override"]["file_artifacts"]
for path in list(baseline_co.keys()):
    if "skill" in path.lower():
        baseline_co[path] = initial_skill

job = PromptLearningJob.from_dict(
    config_dict=config,
    backend_url=backend_url,
    api_key=api_key,
    skip_health_check=True,
)

job_id = job.submit()
print(f"Submitted job: {job_id}")
result = job.poll_until_complete(timeout=7200.0, interval=15.0, progress=True)

print(f"Status: {result.status}")
if result.best_prompt:
    output_dir = Path("output")
    output_dir.mkdir(exist_ok=True)
    output_file = output_dir / f"optimized_skill_{time.strftime('%Y%m%d_%H%M%S')}.md"
    if isinstance(result.best_prompt, dict):
        for msg in result.best_prompt.get("messages", []):
            if msg.get("role") == "system":
                output_file.write_text(msg.get("pattern", ""))
                break
    elif isinstance(result.best_prompt, str):
        output_file.write_text(result.best_prompt)
    print(f"Saved optimized skill: {output_file}")
PY

Troubleshooting

  • Pool not found: make sure your environment-pools browser pool is tagged with browser and kernel or supply a pool_id in the proxy.
  • LinkedIn auth expired: re-login to the Kernel profile and recreate the browser pool.
  • Slow rollouts: reduce max_concurrent in linkedin_gepa.toml or shrink the pool size to limit parallelism.

Ready to get started?