部署 ¶

环境需要 ¶

ubuntu 22.04
nvidia 驱动
nvidia cuda
nvidia cuddn
anaconda3
pytorch

1、配置运行环境 ¶

配置镜像源

mkdir -p ~/.pip/
tee ~/.pip/pip.conf << EOF
[global]
index-url = https://pypi.tuna.tsinghua.edu.cn/simple
#index-url = https://mirrors.aliyun.com/pypi/simple
[install]
trusted-host = https://pypi.tuna.tsinghua.edu.cn
#trusted-host=mirrors.aliyun.com
EOF

创建虚拟环境

# 新建chatglm环境
conda create -n chatglm python=3.8

# 激活chatglm环境
conda activate chatglm

# 下载pytorch
pip3 install torch torchvision torchaudio --index-url https://pypi.tuna.tsinghua.edu.cn/simple
# 或者，官网镜像下载
# pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

验证

进入python

python

验证

python

import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

退出 python

exit()

2、部署（一键部署，快速模式） ¶

创建脚本


tee /root/quick_depoy_chatglm_6b.sh << EOF
#!/bin/bash

# 下载 huggingface
git clone https://gitee.com/aitool/chatglm-6b-model /root/chatglm-6b-model

# 下载模型
wgetUrl=http://minio.10yun.host:9000/ai-models/chatglm-6b-model
wget \$wgetUrl/pytorch_model-00001-of-00008.bin -O pytorch_model-00001-of-00008.bin -P /root/chatglm-6b-model/
wget \$wgetUrl/pytorch_model-00002-of-00008.bin -O pytorch_model-00002-of-00008.bin -P /root/chatglm-6b-model/
wget \$wgetUrl/pytorch_model-00003-of-00008.bin -O pytorch_model-00003-of-00008.bin -P /root/chatglm-6b-model/
wget \$wgetUrl/pytorch_model-00004-of-00008.bin -O pytorch_model-00004-of-00008.bin -P /root/chatglm-6b-model/
wget \$wgetUrl/pytorch_model-00005-of-00008.bin -O pytorch_model-00005-of-00008.bin -P /root/chatglm-6b-model/
wget \$wgetUrl/pytorch_model-00006-of-00008.bin -O pytorch_model-00006-of-00008.bin -P /root/chatglm-6b-model/
wget \$wgetUrl/pytorch_model-00007-of-00008.bin -O pytorch_model-00007-of-00008.bin -P /root/chatglm-6b-model/
wget \$wgetUrl/pytorch_model-00008-of-00008.bin -O pytorch_model-00008-of-00008.bin -P /root/chatglm-6b-model/

# 克隆项目
git clone https://gitee.com/aitool/chatglm-6b.git

# 修改代码
sed -i 's#THUDM/chatglm-6b#/root/chatglm-6b-model#g' /root/chatglm-6b/web_demo.py
sed -i 's#THUDM/chatglm-6b#/root/chatglm-6b-model#g' /root/chatglm-6b/web_demo2.py
sed -i 's#THUDM/chatglm-6b#/root/chatglm-6b-model#g' /root/chatglm-6b/api.py
EOF

执行脚本

sh /root/quick_depoy_chatglm_6b.sh

启动脚本


# demo1 

# demo2 

# api

2、部署 ¶

clone代码

# 官方
git clone https://github.com/THUDM/ChatGLM-6B.git

cd ChatGLM-6B

修改 requirements.txt文件，把后续所有需要的依赖都加上，下面的配置加在文件末尾即可，如果文件里已加上这3个依赖，无需再修改。

vim requirements.txt

添加如下

chardet
streamlit
streamlit-chat

安装依赖

# 需要切换到root用户，如果已经是root用户，不用切换
sudo su
# 使用默认镜像源下载会超时，这里用了清华的pip镜像源地址
pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/

pip3 install -r requirements.txt -i https://pypi.Python.org/simple/

下载 huggingface 文件

源地址如下： https://huggingface.co/THUDM/chatglm-6b/tree/main

含参数模型（估计会慢吧）

git clone https://huggingface.co/THUDM/chatglm-6b /root/chatglm-6b-model

不含模型参数文件的话（推荐）

下载不含模型参数的文件

GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/THUDM/chatglm-6b /root/chatglm-6b-model

下载地址： https://cloud.tsinghua.edu.cn/d/fb9f16d6dc8f482596c2/
本地电脑拷贝到服务器

# 登录 esxi服务器
sftp root@192.168.x.x
# 输入密码
xxxxx
# 拷贝

put -r /Users/xxxxx/Downloads/zzz____ai/ai-models/chatglm-6b-model/* /root/chatglm-6b-model

任选一个运行脚本

3、运行（可选） web_demo2.py ¶

修改 web_demo2.py 启动脚本

原代码

python

tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()

新代码

python

tokenizer = AutoTokenizer.from_pretrained("/root/chatglm-6b-model", trust_remote_code=True)
model = AutoModel.from_pretrained("/root/chatglm-6b-model", trust_remote_code=True).half().cuda()

启动ChatGLM

cd /root/ChatGLM-6B
python3 -m streamlit run ./web_demo2.py --server.port 27777 --server.address 0.0.0.0

看到 http://0.0.0.0:27777 字样说明成功启动了

3、运行（可选） web_demo.py ¶

修改脚本 web_demo.py

原代码

python

tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()

新代码

python

tokenizer = AutoTokenizer.from_pretrained("/root/chatglm-6b-model", trust_remote_code=True)
model = AutoModel.from_pretrained("/root/chatglm-6b-model", trust_remote_code=True).half().cuda()

增加server_name和server_port参数

demo.queue().launch(share=True,server_name="0.0.0.0",server_port=27777)

启动ChatGLM

cd /root/ChatGLM-6B
python3 -m streamlit run ./web_demo2.py --server.port 27777 --server.address 0.0.0.0

# 
# 1. 新增 mirror='https://mirrors.tuna.tsinghua.edu.cn/hugging-face-models, 下载模型使用清华源
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
# 2. 增加server_name和server_port参数
demo.queue().launch(share=True,server_name="0.0.0.0",server_port=9234)

启动

python

python web_demo.py

3、运行（可选） web_api.py ¶

修改脚本 api.py

原代码

python

tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()

新代码

python

tokenizer = AutoTokenizer.from_pretrained("/root/chatglm-6b-model", trust_remote_code=True)
model = AutoModel.from_pretrained("/root/chatglm-6b-model", trust_remote_code=True).half().cuda()

下载额外依赖

pip install fastapi uvicorn

启动

python

python api.py

参考 ¶

https://blog.csdn.net/qq_39544148/article/details/129898181
https://zhuanlan.zhihu.com/p/621216632

模型下载 https://zhuanlan.zhihu.com/p/622302825