-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
114 lines (89 loc) · 3.42 KB
/
test.py
File metadata and controls
114 lines (89 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# This script loads a pre-trained model and prints the names of all modules in the model.
from transformers import LlamaForCausalLM, AutoTokenizer, AutoModelForCausalLM, AutoConfig
from datasets import load_dataset
import torch
import inspect
from datasets import Dataset
from torch.utils.data import DataLoader
from helpers import get_wikitext2
import random
'''
def get_wikitext2(nsamples, seed, seqlen, model):
traindata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train')
testdata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')
print(traindata['text'])
return [], []
path = os.path.join("/Users/vaishnavip/Projects/Quantization-inference/Llama-3.2-3B", "config.json")
with open(path, "r") as f:
config_data = json.load(f)
quantization_config = {"quantization_config":{"config_groups": {
"group_0": {
"targets": ["Linear"],
"weights": {
"num_bits": 4,
"strategy": "channel",
"symmetric": True,
"type": "int",
},
}
},
"format": "pack-quantized",
"global_compression_ratio": 1.891791164021256,
"ignore": ["lm_head"],
"quant_method": "compressed-tensors",
"quantization_status": "frozen",
}
}
config_data.update(quantization_config)
print(config_data)
with open(path, "w") as config_file:
json.dump(config_data, config_file, indent=2, sort_keys=True)
'''
def load_data(model_id):
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenized_dataset = tokenizer("\n\n".join(dataset['text']), return_tensors='pt')
sampled_data = []
sampled_mask = []
for _ in range(128):
i = random.randint(0,tokenized_dataset.input_ids.shape[1]-2048-1)
j = i + 2048
sampled_data.append(tokenized_dataset.input_ids[0,i:j].tolist())
sampled_mask.append(tokenized_dataset.attention_mask[0,i:j].tolist())
data = {"input_ids": sampled_data, "attention_mask": sampled_mask}
ds = Dataset.from_dict(data)
ds.set_format(type="torch", columns=["input_ids","attention_mask"])
loader = DataLoader(ds, batch_size = 128)
return loader
'''
print(dataset.shape)
print('tokenized', tokenized_dataset.shape)
def collate_fn(batch):
input_ids = [item["input_ids"] for item in batch]
padded = tokenizer.pad({"input_ids": input_ids},
return_tensors="pt",
padding="longest")
return padded
dataloader = DataLoader(tokenized_dataset, collate_fn=collate_fn, batch_size=128, shuffle=True)
return dataloader
'''
model_id = "/Users/vaishnavip/Projects/Quantization-inference/Llama-3.2-3B"
load_data(model_id)
'''
state_dict = model.state_dict()
example_layer = "model.layers.0.self_attn.q_proj.weight"
weight = state_dict[example_layer]
print(weight.dtype)
print(torch.min(weight))
print(torch.max(weight))
names_layers = [
(name, module)
for name, module in model.named_modules()
if isinstance(module, torch.nn.Linear)
]
names_layers = sorted(names_layers, key=lambda name_layer: name_layer[0])
layers = [layer for _name, layer in names_layers]
for layer_index, layer in enumerate(layers):
# This in with hooks on for computing the Hessian
layer(**inputs)
'''