| | |
| | |
| |
|
| | from transformers import AutoModelForSequenceClassification, AutoTokenizer |
| | import torch |
| | import sys |
| | from pathlib import Path |
| |
|
| | MODEL_PATH = Path("models/binary/final") |
| | MAX_LENGTH = 512 |
| |
|
| | def load_model_and_tokenizer(): |
| | tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) |
| | model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) |
| | model.eval() |
| | return tokenizer, model |
| |
|
| | def predict(filepath: Path, tokenizer, model, debug: bool = False): |
| | text = filepath.read_text(encoding="utf-8") |
| |
|
| | inputs = tokenizer( |
| | text, |
| | return_tensors="pt", |
| | truncation=True, |
| | padding="max_length", |
| | max_length=MAX_LENGTH, |
| | ) |
| |
|
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| | logits = outputs.logits |
| | probs = torch.nn.functional.softmax(logits, dim=1).squeeze() |
| | pred = torch.argmax(probs).item() |
| | label = "good" if pred == 0 else "bad" |
| |
|
| | print(f"\n🧪 Predykcja dla pliku: {filepath.name}") |
| | print(f"📄 Długość: {len(text.splitlines())} linii") |
| | print(f"📊 Wynik: {label.upper()}") |
| | print(f"🔢 Prawdopodobieństwo: good={probs[0]:.4f}, bad={probs[1]:.4f}") |
| |
|
| | if debug: |
| | print("\n🛠 DEBUG INFO:") |
| | print(f"✏️ Surowy tekst (pierwsze 300 znaków):\n{text[:300]}") |
| | print(f"🔢 Liczba tokenów: {len(inputs['input_ids'][0])}") |
| | print(f"📈 Logity: {logits.tolist()[0]}") |
| | print(f"📊 Softmax: good={probs[0].item():.5f}, bad={probs[1].item():.5f}") |
| |
|
| | def main(): |
| | if len(sys.argv) < 2: |
| | print("❌ Użycie: python3 12.1_predict_binary_file.py /ścieżka/do/Dockerfile [--debug]") |
| | sys.exit(1) |
| |
|
| | filepath = Path(sys.argv[1]) |
| | debug = "--debug" in sys.argv |
| |
|
| | if not filepath.exists(): |
| | print(f"❌ Plik {filepath} nie istnieje.") |
| | sys.exit(1) |
| |
|
| | tokenizer, model = load_model_and_tokenizer() |
| | predict(filepath, tokenizer, model, debug=debug) |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|