-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathsetup_lambdalabs.py
More file actions
93 lines (75 loc) · 3.19 KB
/
setup_lambdalabs.py
File metadata and controls
93 lines (75 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import requests
import paramiko
from dotenv import dotenv_values
config = dotenv_values("./.env")
api_key = config["LL_SECRET"]
ssh_key_filename = config["ssh_key_filename"]
training_data_url = config["training_data_url"]
response = requests.get('https://cloud.lambdalabs.com/api/v1/instances', auth=(api_key, ''))
response_data = response.json()
response_data = response_data["data"]
for instance in response_data:
id = instance["id"]
ip = instance["ip"]
gpu = instance["instance_type"]["name"]
jupyter_url = instance["jupyter_url"]
print(id, ip, gpu, jupyter_url)
LambdLabsServer = ip
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(LambdLabsServer, username="ubuntu", key_filename=ssh_key_filename)
def run_cmd(cmd):
print(cmd)
stdin, stdout, stderr = ssh.exec_command(cmd)
for line in stdout:
print(line, end='')
def run_basic_setup():
run_cmd("sudo rm /var/lib/man-db/auto-update") # https://askubuntu.com/questions/272248/processing-triggers-for-man-db
run_cmd("sudo apt-get update -y")
#run_cmd("sudo apt-get upgrade -y")
run_cmd("sudo apt-get install git-lfs -y")
run_cmd("python3 -m pip install --upgrade pip")
run_cmd("pip3 install transformers")
run_cmd("pip3 install datasets accelerate loralib peft")
run_cmd("pip3 install sentencepiece")
run_cmd("pip3 install protobuf==3.20.*")
def setup_LD_LIBRARY_PATH():
#run_cmd("find /usr/lib/ -name libcudart.so 2>/dev/null") # use this to find it
run_cmd("export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/")
run_cmd('echo "export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/">>~/.bashrc')
def replace_in_file(file, old_string, new_string):
with open(file, 'r') as f:
s = f.read()
s = s.replace(old_string, new_string)
with open(file, 'w') as f:
f.write(s)
def compile_bits_and_bytes(patch_save_pretrained=True):
run_cmd("git clone https://github.com/TimDettmers/bitsandbytes")
run_cmd("cd bitsandbytes;pip install -r requirements.txt")
run_cmd("nvcc --version")
if patch_save_pretrained:
# patch_save_pretrained begin
# there is a problem with saving the model with running out of GPU memory
# this patch fixes this problem. Credit goes to angelovAlex
sftp = ssh.open_sftp()
sftp.get('/home/ubuntu/bitsandbytes/bitsandbytes/nn/modules.py', './modules.py')
replace_in_file('./modules.py', "self.state.CxB, self.state.tile_indices", "self.state.CxB.cpu(), self.state.tile_indices.cpu()")
sftp.put('./modules.py', '/home/ubuntu/bitsandbytes/bitsandbytes/nn/modules.py')
sftp.close()
# patch_save_pretrained end
run_cmd("cd bitsandbytes;CUDA_VERSION=118 make cuda11x")
run_cmd("cd bitsandbytes;sudo python setup.py install")
def upload_training_data():
run_cmd("wget " + training_data_url)
def upload_train_py():
sftp = ssh.open_sftp()
sftp.put('train_text.py', '/home/ubuntu/train_text.py')
sftp.put('inference.py', '/home/ubuntu/inference.py')
sftp.close()
run_basic_setup()
setup_LD_LIBRARY_PATH()
compile_bits_and_bytes()
upload_training_data()
upload_train_py()
print(ip)
ssh.close()