slavashvets
diff --git a/‎README.md‎
Lines changed: 6 additions & 3 deletions b/‎README.md‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎Taskfile.yaml‎
Lines changed: 58 additions & 0 deletions b/‎Taskfile.yaml‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎docs/demo.cast‎
Lines changed: 0 additions & 408 deletions b/‎docs/demo.cast‎
Lines changed: 0 additions & 408 deletions
diff --git a/‎docs/demo.svg‎
Lines changed: 1 addition & 1 deletion b/‎docs/demo.svg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎main.py‎
Lines changed: 36 additions & 8 deletions b/‎main.py‎
Lines changed: 36 additions & 8 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎uv.lock‎
Lines changed: 36 additions & 0 deletions b/‎uv.lock‎
Lines changed: 36 additions & 0 deletions
@@ -12,14 +12,17 @@ Demonstrate how to fine-tune a Sentence-Transformers encoder with the [SetFit](h
 
 ## Quick start
 
-`uv` handles the virtual environment and dependencies automatically:
+[Taskfile](https://taskfile.dev/installation/) handles task management, and [uv](https://docs.astral.sh/uv/getting-started/installation/) manages the virtual environment and dependencies automatically (including python executable).
 
 ```bash
 # Train the model and save the best checkpoint under models/
-uv run main.py train
+task train
 
 # Evaluate on the held-out test set
-uv run main.py test
+task test
+
+# List all available tasks
+task --list
 ```
 
 ## Dataset
 
@@ -0,0 +1,58 @@
+version: "3"
+
+tasks:
+  check-uv:
+    cmds:
+      - |
+        if ! command -v uv &> /dev/null; then
+          echo "UV is not installed. Please install UV:"
+          echo "https://docs.astral.sh/uv/getting-started/installation/"
+          exit 1
+        fi
+    silent: true
+
+  check-asciinema:
+    cmds:
+      - |
+        if ! command -v asciinema &> /dev/null; then
+          echo "Asciinema is not installed. Please install Asciinema:"
+          echo "https://docs.asciinema.org/getting-started/"
+          exit 1
+        fi
+    silent: true
+
+  check-bunx:
+    cmds:
+      - |
+        if ! command -v bunx &> /dev/null; then
+          echo "Bun is not installed. Please install Bun:"
+          echo "https://bun.sh/docs/installation"
+          exit 1
+        fi
+    silent: true
+
+  train:
+    deps:
+      - check-uv
+    desc: Train the model using data/train.csv and data/eval.csv
+    cmds:
+      - uv run main.py train
+
+  test:
+    deps:
+      - check-uv
+    desc: Test the model performance using data/test.csv
+    cmds:
+      - uv run main.py test
+
+  rec:
+    deps:
+      - check-asciinema
+      - check-bunx
+    desc: Record a terminal session using Asciinema and convert to SVG
+    cmds:
+      - asciinema rec -i 2 --cols 120 --rows 34 docs/demo.cast
+      - bunx svg-term-cli --in docs/demo.cast --out docs/demo.svg --window --padding 2
+      - rm docs/demo.cast
+    generates:
+      - docs/demo.svg
@@ -6,6 +6,8 @@
 import numpy as np
 import pandas as pd
 from datasets import Dataset
+from rich.console import Console
+from rich.table import Table
 from setfit import SetFitModel, Trainer, TrainingArguments
 from sklearn.metrics import accuracy_score, f1_score
 
@@ -29,6 +31,8 @@
 BASE_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
 MODEL_DIR: Path = Path("models/job_interest_classifier")
 
+console = Console()
+
 
 def compute_metrics(y_pred, y_true) -> dict[str, float]:
     """Return accuracy and F1 for SetFit trainer."""
@@ -38,11 +42,18 @@ def compute_metrics(y_pred, y_true) -> dict[str, float]:
     }
 
 
-def classify_texts(texts: pd.Series, model_dir: Path) -> list[int]:
+def classify_texts(
+    texts: pd.Series, model_dir: Path, probabilities: bool = False
+) -> list:
     """Predict binary labels for a sequence of vacancy texts."""
     model = SetFitModel.from_pretrained(model_dir)
-    raw = model.predict(texts.to_list())
-    return np.atleast_1d(raw).astype(int).tolist()
+
+    if probabilities:
+        probas = model.predict_proba(texts.to_list())
+        return [proba[1] * 100 for proba in probas]
+    else:
+        raw = model.predict(texts.to_list())
+        return np.atleast_1d(raw).astype(int).tolist()
 
 
 def train() -> dict[str, float]:
@@ -71,15 +82,32 @@ def train() -> dict[str, float]:
     return trainer.evaluate()
 
 
-def test() -> dict[str, float]:
+def test() -> None:
     """Compute and print accuracy and F1 (if labels present) for test data."""
     df = TEST_DF
-    preds = classify_texts(df["text"], MODEL_DIR)
+    preds = classify_texts(df["text"], MODEL_DIR, probabilities=False)
+    probas = classify_texts(df["text"], MODEL_DIR, probabilities=True)
+
+    results_table = Table(title="Classification Results")
+    results_table.add_column("Index", justify="right", style="cyan")
+    results_table.add_column("Text", style="magenta")
+    results_table.add_column("Probability (%)", justify="center", style="yellow")
+    results_table.add_column("Prediction", justify="center", style="yellow")
+
+    for (idx, text), proba, pred in zip(df["text"].items(), probas, preds):
+        results_table.add_row(str(idx), text, f"{proba:.2f}%", str(pred))
+
+    console.print(results_table)
+
+    metrics = compute_metrics(preds, df["label"])
+    metrics_table = Table(title="Metrics")
+    metrics_table.add_column("Metric", style="cyan")
+    metrics_table.add_column("Value", justify="center", style="yellow")
 
-    for (idx, text), pred in zip(df["text"].items(), preds):
-        print(f"{idx}: {text} -> {pred}")
+    for metric_name, metric_value in metrics.items():
+        metrics_table.add_row(metric_name, f"{metric_value:.4f}")
 
-    return compute_metrics(preds, df["label"])
+    console.print(metrics_table)
 
 
 if __name__ == "__main__":
 
@@ -8,6 +8,7 @@ dependencies = [
     "datasets>=3.6.0",
     "fire>=0.7.0",
     "pandas>=2.2.3",
+    "rich>=14.0.0",
     "scikit-learn>=1.6.1",
     "setfit>=1.1.2",
 ]
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ dependencies = [`
`8`	`8`	`"datasets>=3.6.0",`
`9`	`9`	`"fire>=0.7.0",`
`10`	`10`	`"pandas>=2.2.3",`
	`11`	`+ "rich>=14.0.0",`
`11`	`12`	`"scikit-learn>=1.6.1",`
`12`	`13`	`"setfit>=1.1.2",`
`13`	`14`	`]`