[PATCH v3 1/4] dts: improve starting and stopping interactive shells

Juraj Linkeš juraj.linkes at pantheon.tech
Mon Jun 10 15:36:44 CEST 2024


> diff --git a/dts/framework/remote_session/interactive_shell.py b/dts/framework/remote_session/interactive_shell.py
> index 5cfe202e15..921c73d9df 100644
> --- a/dts/framework/remote_session/interactive_shell.py
> +++ b/dts/framework/remote_session/interactive_shell.py
> @@ -32,6 +34,10 @@ class InteractiveShell(ABC):
>       and collecting input until reaching a certain prompt. All interactive applications
>       will use the same SSH connection, but each will create their own channel on that
>       session.
> +
> +    Attributes:
> +        is_started: :data:`True` if the application has started successfully, :data:`False`
> +            otherwise.
>       """
>   
>       _interactive_session: SSHClient
> @@ -41,6 +47,7 @@ class InteractiveShell(ABC):
>       _logger: DTSLogger
>       _timeout: float
>       _app_args: str
> +    _finalizer: weakref.finalize
>   
>       #: Prompt to expect at the end of output when sending a command.
>       #: This is often overridden by subclasses.
> @@ -58,6 +65,8 @@ class InteractiveShell(ABC):
>       #: for DPDK on the node will be prepended to the path to the executable.
>       dpdk_app: ClassVar[bool] = False
>   
> +    is_started: bool = False

A better name would be is_alive to unify it with SSHSession.

> +
>       def __init__(
>           self,
>           interactive_session: SSHClient,
> @@ -93,17 +102,39 @@ def __init__(
>       def _start_application(self, get_privileged_command: Callable[[str], str] | None) -> None:
>           """Starts a new interactive application based on the path to the app.
>   
> -        This method is often overridden by subclasses as their process for
> -        starting may look different.
> +        This method is often overridden by subclasses as their process for starting may look
> +        different. Initialization of the shell on the host can be retried up to 5 times. This is
> +        done because some DPDK applications need slightly more time after exiting their script to
> +        clean up EAL before others can start.
> +
> +        When the application is started we also bind a class for finalization to this instance of
> +        the shell to ensure proper cleanup of the application.

Let's also include the explanation from the commit message.

>   
>           Args:
>               get_privileged_command: A function (but could be any callable) that produces
>                   the version of the command with elevated privileges.
>           """
> +        self._finalizer = weakref.finalize(self, self._close)

This looks like exactly what we should do, but out of curiosity, do 
Paramiko docs mention how we should handle channel closing?

> +        max_retries = 5
> +        self._ssh_channel.settimeout(5)
>           start_command = f"{self.path} {self._app_args}"
>           if get_privileged_command is not None:
>               start_command = get_privileged_command(start_command)
> -        self.send_command(start_command)
> +        self.is_started = True
> +        for retry in range(max_retries):
> +            try:
> +                self.send_command(start_command)
> +                break
> +            except TimeoutError:
> +                self._logger.info(
> +                    "Interactive shell failed to start, retrying... "
> +                    f"({retry+1} out of {max_retries})"
> +                )
> +        else:
> +            self._ssh_channel.settimeout(self._timeout)
> +            self.is_started = False  # update state on failure to start
> +            raise InteractiveCommandExecutionError("Failed to start application.")
> +        self._ssh_channel.settimeout(self._timeout)
>   
>       def send_command(self, command: str, prompt: str | None = None) -> str:
>           """Send `command` and get all output before the expected ending string.


More information about the dev mailing list