| | |
| |
|
| | from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer |
| | from datasets import load_from_disk |
| | from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay |
| | import numpy as np |
| | import pandas as pd |
| | import matplotlib.pyplot as plt |
| | from pathlib import Path |
| | import json |
| |
|
| | |
| | MODEL_DIR = Path("models/binary/final") |
| | DATASET_DIR = Path("data/processed/dataset_binary") |
| | OUT_DIR = MODEL_DIR |
| |
|
| | REPORT_CSV = OUT_DIR / "classification_report.csv" |
| | REPORT_JSON = OUT_DIR / "metrics.json" |
| | CONF_MATRIX_PNG = OUT_DIR / "confusion_matrix.png" |
| |
|
| | |
| | print("📂 Wczytywanie modelu...") |
| | model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR) |
| |
|
| | |
| | tokenizer_files = list(MODEL_DIR.glob("tokenizer*")) |
| | if tokenizer_files: |
| | tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) |
| | else: |
| | print("⚠️ Brak tokenizera w modelu — pobieram z microsoft/codebert-base") |
| | tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base") |
| | tokenizer.save_pretrained(MODEL_DIR) |
| |
|
| | |
| | ds = load_from_disk(str(DATASET_DIR)) |
| | trainer = Trainer(model=model) |
| |
|
| | |
| | print("🔍 Predykcja na zbiorze testowym...") |
| | predictions = trainer.predict(ds["test"]) |
| | y_pred = np.argmax(predictions.predictions, axis=1) |
| | y_true = predictions.label_ids |
| |
|
| | |
| | print("\n📊 Raport klasyfikacji:") |
| | report_dict = classification_report( |
| | y_true, y_pred, target_names=["good", "bad"], zero_division=0, output_dict=True |
| | ) |
| | report_text = classification_report( |
| | y_true, y_pred, target_names=["good", "bad"], zero_division=0 |
| | ) |
| | print(report_text) |
| |
|
| | |
| | df_report = pd.DataFrame(report_dict).transpose() |
| | df_report.to_csv(REPORT_CSV) |
| | with open(REPORT_JSON, "w") as f: |
| | json.dump(report_dict, f, indent=2) |
| |
|
| | print(f"💾 Zapisano raport CSV: {REPORT_CSV}") |
| | print(f"💾 Zapisano metryki JSON: {REPORT_JSON}") |
| |
|
| | |
| | conf_matrix = confusion_matrix(y_true, y_pred) |
| | labels = ["good", "bad"] |
| | disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=labels) |
| |
|
| | plt.figure(figsize=(5, 4)) |
| | disp.plot(cmap="Purples", values_format="d") |
| | plt.title("🧱 Confusion Matrix – Binary Classifier") |
| | plt.grid(False) |
| | plt.tight_layout() |
| | plt.savefig(CONF_MATRIX_PNG) |
| | plt.close() |
| |
|
| | print(f"🖼️ Zapisano confusion matrix jako PNG: {CONF_MATRIX_PNG}") |
| |
|