Skip to content
This repository was archived by the owner on Oct 16, 2023. It is now read-only.

make server correct #29

Merged
merged 3 commits into from
Apr 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions energon/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import click
import typer
from energon.cli.service import service

app = typer.Typer()

@app.callback()
def callback():
"""
Typer app, including Click subapp
"""

typer_click_object = typer.main.get_command(app)
typer_click_object.add_command(service, "service")

if __name__ == "__main__":
typer_click_object()
82 changes: 82 additions & 0 deletions energon/cli/service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import click
import torch
import energon.server as server
from multiprocessing import Process

@click.group()
def service():
pass


@service.command()
@click.option("--model_name", default="bert_small", type=str)
@click.option("--model_type", default="bert", type=str)
@click.option("--max_batch_size", default=32, type=int)
@click.option("--tp_init_size", default=1, type=int)
@click.option("--pp_init_size", default=1, type=int)
@click.option("--host", default="127.0.0.1", type=str)
@click.option("--port", default=29400, type=int)
@click.option("--half", is_flag=True, show_default=True)
@click.option("--checkpoint", type=str)
@click.option("--server_host", default="127.0.0.1", type=str)
@click.option("--server_port", default=8005, type=int)
@click.option("--log_level", default="info", type=str)
@click.option("--backend", default="nccl", type=str)
def init(model_name,
model_type,
max_batch_size,
tp_init_size,
pp_init_size,
host,
port,
half,
checkpoint,
server_host,
server_port,
log_level,
backend):

click.echo(f'*** Energon Init Configurations: *** \n'
f'Model Name: {model_name} \n'
f'Max Batch Size: {max_batch_size} \n'
f'Tensor Parallelism Size: {tp_init_size} \n'
f'Pipeline Parallelism Size: {pp_init_size} \n'
f'Communication Host: {host} \n'
f'Communication Port: {port} \n'
f'Is Half: {half} \n'
f'Checkpoint Path: {checkpoint} \n'
f'Worker Server Host: {server_host} \n'
f'Worker Server Port: {server_port} \n'
f'Unvicorn Log Level: {log_level} \n')

if half:
dtype = torch.half
else:
dtype = torch.float

world_size = tp_init_size * pp_init_size
num_worker = world_size - 1

engine_port = server_port
worker_port = server_port + 1
worker_rank = 1 # start from 1

process_list = []
for i in range(num_worker):
p = Process(target=server.launch_worker,
args=(host, port, tp_init_size, pp_init_size, "nccl", 1024, True, worker_rank+i, worker_rank+i, server_host, worker_port+i, log_level))
p.start()
process_list.append(p)

server.launch_engine(model_name,
model_type,
max_batch_size,
tp_init_size,
pp_init_size,
host,
port,
dtype,
checkpoint,
server_host,
engine_port,
log_level)
5 changes: 1 addition & 4 deletions energon/engine/engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import time
import torch
from torch.nn import Module
Expand Down Expand Up @@ -68,9 +67,7 @@ def __init__(self,
def _init_dist_rpc(self):
r'''
Based on global_context, init the rpc connection.
'''
os.environ['MASTER_ADDR'] = self.host
os.environ['MASTER_PORT'] = f'{self.port}'
'''
launch_from_multiprocess(tp_size = self.tp_size, pp_size = self.pp_size, rank = self.rank, local_rank = self.rank, world_size = self.global_world_size, host = self.host, port = self.port)
rpc_backend_options=rpc.TensorPipeRpcBackendOptions(
num_worker_threads=16)
Expand Down
15 changes: 14 additions & 1 deletion energon/engine/server.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import uvicorn
import argparse
from fastapi import FastAPI
import torch.distributed.rpc as rpc
from energon.initialize import launch_from_multiprocess
Expand Down Expand Up @@ -40,4 +41,16 @@ def launch_worker(host="127.0.0.1", port=8005, log_level="info"):
global server
config = uvicorn.Config(app, host=host, port=port, log_level=log_level)
server = uvicorn.Server(config=config)
server.run()
server.run()


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="127.0.0.1", help="Iteration")
parser.add_argument("--port", type=int, default=8005, help="Port")
parser.add_argument("--log_level", default="info", type=str)
args = parser.parse_args()
launch_worker(args.host, args.port, args.log_level)

if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion energon/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ def launch_from_multiprocess(tp_size: int = 1,
here we provide the multiprocess launch.
TODO: only support the single node condition now.
"""

os.environ['MASTER_ADDR'] = host
os.environ['MASTER_PORT'] = f'{port}'

launch(local_rank=local_rank,
rank=rank,
world_size=world_size,
Expand Down
2 changes: 2 additions & 0 deletions energon/model/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .bert import *
from .gpt import *
4 changes: 4 additions & 0 deletions energon/model/bert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .bert import bert_small


__all__ = ['bert_small']
Loading