File size: 6,682 Bytes
6f8c8ab | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | import abc
import asyncio
import logging
import multiprocessing
import threading
from dataclasses import dataclass
from ray.dashboard.subprocesses.message import (
ChildBoundMessage,
RequestMessage,
UnaryResponseMessage,
)
from ray.dashboard.subprocesses.utils import (
assert_not_in_asyncio_loop,
module_logging_filename,
)
from ray._private.ray_logging import setup_component_logger
logger = logging.getLogger(__name__)
@dataclass
class SubprocessModuleConfig:
"""
Configuration for a SubprocessModule.
Pickleable.
"""
# Logger configs. Will be set up in subprocess entrypoint `run_module`.
logging_level: str
logging_format: str
log_dir: str
# Name of the "base" log file. Its stem is appended with the Module.__name__.
# e.g. when logging_filename = "dashboard.log", and Module is JobHead,
# we will set up logger with name "dashboard-JobHead.log". This name will again be
# appended with .1 and .2 for rotation.
logging_filename: str
logging_rotate_bytes: int
logging_rotate_backup_count: int
class SubprocessModule(abc.ABC):
"""
A Dashboard Head Module that runs in a subprocess. This is used with the decorators
to define a (request -> response) endpoint, or a (request -> AsyncIterator[bytes])
for a streaming endpoint.
"""
def __init__(
self,
config: SubprocessModuleConfig,
child_bound_queue: multiprocessing.Queue,
parent_bound_queue: multiprocessing.Queue,
):
"""
Initialize current module when DashboardHead loading modules.
:param dashboard_head: The DashboardHead instance.
"""
self._config = config
self._child_bound_queue = child_bound_queue
self._parent_bound_queue = parent_bound_queue
@staticmethod
def is_minimal_module():
"""
Currently all SubprocessModule classes should be non-minimal.
We require this because SubprocessModuleHandle tracks aiohttp requests and
responses. To ease this, we can define another SubprocessModuleMinimalHandle
that doesn't track requests and responses, but still provides Queue interface
and health check.
TODO(ryw): If needed, create SubprocessModuleMinimalHandle.
"""
return False
@abc.abstractmethod
async def init(self):
"""
Run the module in an asyncio loop. A head module can provide
servicers to the server.
Only after this method is returned, the module will start receiving messages
from the parent queue.
"""
pass
def handle_child_bound_message(
self,
loop: asyncio.AbstractEventLoop,
message: ChildBoundMessage,
):
"""Handles a message from the child bound queue."""
if isinstance(message, RequestMessage):
# Assume module has a method_name method that has signature:
#
# async def my_handler(self: SubprocessModule,
# message: RequestMessage,
# parent_bound_queue: multiprocessing.Queue) -> None
#
# which comes from the decorators from MethodRouteTable.
method = getattr(self, message.method_name)
# getattr() already binds self to method, so we don't need to pass it.
asyncio.run_coroutine_threadsafe(
method(message, self._parent_bound_queue), loop
)
else:
raise ValueError(f"Unknown message type: {type(message)}")
def dispatch_child_bound_messages(
self,
loop: asyncio.AbstractEventLoop,
):
"""
Dispatch Messages to the module. This function should be run in a separate
thread from the asyncio loop of the module.
"""
assert_not_in_asyncio_loop()
while True:
message = self._child_bound_queue.get()
try:
self.handle_child_bound_message(loop, message)
except Exception:
logger.exception(
f"Error handling child bound message {message}. This request will hang forever."
)
async def _internal_health_check(
self, message: RequestMessage, parent_bound_queue: multiprocessing.Queue
) -> None:
"""
Internal health check. Sends back a response to the parent queue.
Note this is NOT registered as a route, so an external HTTP request will not
trigger this.
"""
try:
parent_bound_queue.put(
UnaryResponseMessage(
request_id=message.request_id, status=200, body=b"ok!"
)
)
except Exception as e:
logger.error(
f"Error sending response: {e}. This means we will never reply the parent's health check request. The parent will think the module is dead."
)
def run_module(
child_bound_queue: multiprocessing.Queue,
parent_bound_queue: multiprocessing.Queue,
cls: type[SubprocessModule],
config: SubprocessModuleConfig,
):
"""
Entrypoint for a subprocess module.
Creates a dedicated thread to listen from the the parent queue and dispatch messages
to the module. Only listen to the parent queue AFTER the module is prepared by
`module.init()`.
"""
module_name = cls.__name__
logging_filename = module_logging_filename(module_name, config.logging_filename)
setup_component_logger(
logging_level=config.logging_level,
logging_format=config.logging_format,
log_dir=config.log_dir,
filename=logging_filename,
max_bytes=config.logging_rotate_bytes,
backup_count=config.logging_rotate_backup_count,
)
assert_not_in_asyncio_loop()
loop = asyncio.new_event_loop()
module = cls(config, child_bound_queue, parent_bound_queue)
loop.run_until_complete(module.init())
dispatch_child_bound_messages_thread = threading.Thread(
name=f"{module_name}-dispatch_child_bound_messages_thread",
target=module.dispatch_child_bound_messages,
args=(loop,),
daemon=True,
)
dispatch_child_bound_messages_thread.start()
try:
loop.run_forever()
except KeyboardInterrupt:
# TODO: do graceful shutdown.
# 1. define a stop token.
# 2. dispatch_child_bound_messages_thread will stop listening.
# 3. join the loop to wait for all pending tasks to finish, up until a timeout.
# 4. close the loop and exit.
loop.stop()
finally:
loop.close()
|