"""Re-classify saved scrollbacks (refined): command-mode vs drift, and the
command/exit cross-tab. Echo/printf of prose counts as a command, not drift.

Usage:
    tar xzf scrollbacks.tar.gz -C runs/
    python classify.py runs/
"""
import re, sys, collections, pathlib

PROSE = re.compile(r"\b(let me|let's|it seems|i think|i'll|i will|i'm|maybe|perhaps|"
                   r"seems like|looks like|sorry|frustrating|trying to|going to|that's|"
                   r"let us|we can|we could|i need|i should|actually|hmm|oops|whoops|"
                   r"apolog|confus|problem is|the issue)\b", re.I)
CMD = re.compile(r"^\s*(echo|printf)\b", re.I)

def asst(s):
    return [m.group(1) for t in re.split(r"(?=\[(?:assistant|user|system)\])", s)
            for m in [re.match(r"\[assistant\]\s?(.*)", t, re.S)] if m]

def command_mode(s):
    a = asst(s)
    if not a:
        return False
    for turn in a:
        for line in turn.splitlines():
            ls = line.strip()
            if ls and not CMD.match(ls) and PROSE.search(ls):
                return False            # raw prose typed at the shell = drift
    if any("alice@sek" in t and not CMD.match(t.strip()) for t in a):
        return False                    # emits the prompt = over-imitation
    return True

def exited(s):
    return any(ln.strip() == "exit" for t in asst(s) for ln in t.splitlines())

LABELS = {"c0": "C0 no-seed", "c1": "C1 bare", "c2": "C2 merged",
          "c3": "C3 failure-only", "c4": "C4 standalone", "c5": "C5 +stop",
          "c6": "C6 standalone/no-fp"}

def main():
    d = pathlib.Path(sys.argv[1] if len(sys.argv) > 1 else ".")
    print(f"{'config':22s} command-mode   clean(cmd+exit)  cmd-no-exit  drift")
    for c in ["c0", "c1", "c2", "c3", "c4", "c5", "c6"]:
        cm = clean = working = drift = 0
        for i in range(1, 6):
            f = d / f"{c}_run{i}.txt"
            if not f.exists():
                continue
            s = f.read_text(encoding="utf-8", errors="replace")
            m, e = command_mode(s), exited(s)
            cm += m
            if m and e: clean += 1
            elif m: working += 1
            else: drift += 1
        if c in LABELS:
            print(f"{LABELS[c]:22s} {cm}/5            {clean}/5              "
                  f"{working}/5          {drift}/5")

if __name__ == "__main__":
    main()
