TextBandit/4bit.py at main · ChainedTears/TextBandit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import secrets

# Setup Meta-Llama-3.1-8N-Instruct with PyTorch
# Specify Model ID

model_id = "meta-llama/Llama-3.2-1B"

# Setup MPS Device

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Load the tokenizer

tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=".")
# Set pad_token_id to eos_token_id if not set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4"
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)
# Load the model

try:
    model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
    print("Model loaded successfully")
except Exception as e:
    print(f"Error loading model: {e}")

def get_response(input_text, system_prompt):
    print(f"Getting response for input: {input_text}")
    full_prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{input_text}\n<|assistant|>"
    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)

    # Create attention mask to avoid warnings
    attention_mask = torch.ones_like(inputs["input_ids"])

    with torch.no_grad():
        outputs = model.generate(

            inputs["input_ids"].to(device),
            attention_mask=attention_mask.to(device),
            max_length=50,
            num_return_sequences=1,
            temperature=0.7,
            pad_token_id=tokenizer.pad_token_id
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract just the assistant's response
    if "<|assistant|>" in response:
        response = response.split("<|assistant|>")[-1].strip()
    return response


# Define the prompt
# The prompt should be the bandit simulation (We can build the slot machine game in a function)

def bandit_simulation(choice):
    random_number = secrets.randbelow(100)
    if choice == 1:
        if random_number < 30:
            return "You win"
        else:
            return "You lose"
    if choice == 2:
        if random_number < 65:
            return "You win"
        else:
            return "You lose"


# Main execution loop
def main():
    prompt = """You are in a Casino with 2 slot machines.
    After each play, I'll describe what happened in words.
    Your goal is to walk out with as many tokens as possible.
    Each slot machine has a hidden probability of winning and losing.
    You have a choice to choose between 1 or 2.
    You start with 1, and will be told the results of your previous actions."""

    previous_outputs = ""
    correct, ratio, total, previous_choice = 0
    # Run for 10 iterations
    while not (total < 100 and ratio > 0.8):
        if previous_choice == 2:
            correct += 1
        total += 1
        ratio = correct / total
        print(f"Iteration {total}")
        if total == 0:
            choice = 1
            result = bandit_simulation(choice)
            previous_outputs += f"Choice: {choice} Result: {result}\n"
            print(f"Choice: {choice} Result: {result}\n")
        else:
            ai_response = get_response(previous_outputs, prompt)

            try:
                choice = int(ai_response.strip())
                if choice not in [1, 2]:
                    print(f"Invalid choice '{choice}'")
                    return
            except ValueError:
                print(f"Invalid response '{ai_response}'")
                return
            result = bandit_simulation(choice)
            previous_outputs += f"Choice: {choice} Result: {result}\n"
            print(f"Choice: {choice} Result: {result}\n")

if __name__ == "__main__":
    main()