| def _test_client(port: int, print_logprobs: bool = False, test_vlm: bool = False): |
| import requests |
| import time |
| import aiohttp |
| from pprint import pprint |
| from swift.llm import InferClient, InferRequest, RequestConfig |
|
|
| infer_client = InferClient(port=port) |
|
|
| while True: |
| try: |
| models = infer_client.models |
| print(f'models: {models}') |
| except aiohttp.ClientConnectorError: |
| time.sleep(5) |
| continue |
| break |
|
|
| if test_vlm: |
| query = '这是什么' |
| |
| messages = [{ |
| 'role': |
| 'user', |
| 'content': [ |
| { |
| 'type': 'text', |
| 'text': '这是什么' |
| }, |
| { |
| 'type': 'image_url', |
| 'image_url': { |
| 'url': 'cat.png' |
| } |
| }, |
| ] |
| }] |
| else: |
| query = '123*234=?' |
| messages = [{'role': 'user', 'content': query}] |
|
|
| infer_request = InferRequest(messages=messages) |
| request_config = RequestConfig(seed=42, max_tokens=256, temperature=0.8, logprobs=True, top_logprobs=5) |
|
|
| resp = infer_client.infer([infer_request], request_config=request_config)[0] |
| response = resp.choices[0].message.content |
| print(f'query: {query}') |
| print(f'response: {response}') |
| if print_logprobs: |
| pprint(resp.choices[0].logprobs) |
|
|
| request_config = RequestConfig( |
| stream=True, seed=42, max_tokens=256, temperature=0.8, top_k=20, top_p=0.8, logprobs=True, top_logprobs=5) |
| gen_list = infer_client.infer([infer_request], request_config=request_config) |
| print(f'query: {query}') |
| print('response: ', end='') |
| for chunk in gen_list[0]: |
| print(chunk.choices[0].delta.content, end='', flush=True) |
| if print_logprobs and chunk.choices[0].logprobs is not None: |
| pprint(chunk.choices[0].logprobs) |
| print() |
|
|
|
|
| def _test(infer_backend, test_vlm: bool = False): |
| import os |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
| from swift.llm import DeployArguments |
| from swift.llm import deploy_main |
| import multiprocessing |
| mp = multiprocessing.get_context('spawn') |
| model = 'Qwen/Qwen2-VL-7B-Instruct' if test_vlm else 'Qwen/Qwen2-7B-Instruct' |
| args = DeployArguments(model=model, infer_backend=infer_backend, verbose=False) |
| process = mp.Process(target=deploy_main, args=(args, )) |
| process.start() |
| _test_client(args.port, True, test_vlm) |
| process.terminate() |
|
|
|
|
| def test_vllm_vlm(): |
| _test('vllm', test_vlm=True) |
|
|
|
|
| def test_vllm(): |
| _test('vllm') |
|
|
|
|
| def test_lmdeploy(): |
| _test('lmdeploy') |
|
|
|
|
| def test_pt(): |
| _test('pt') |
|
|
|
|
| def test_vllm_origin(): |
| import os |
| import subprocess |
| import sys |
| from modelscope import snapshot_download |
| model_dir = snapshot_download('Qwen/Qwen2-7B-Instruct') |
| args = [sys.executable, '-m', 'vllm.entrypoints.openai.api_server', '--model', model_dir] |
| process = subprocess.Popen(args) |
| _test_client(8000) |
| process.terminate() |
|
|
|
|
| if __name__ == '__main__': |
| |
| |
| test_vllm_vlm() |
| |
| |
|
|