[lldb] Use reverse connection method for lldb-server tests

This fixes an flakyness is all gdb-remote tests. These tests have been
(mildly) flaky since we started using "localhost" instead of 127.0.0.1
in the test suite. The reason is that lldb-server needs to create two
sockets (v4 and v6) to listen for localhost connections. The algorithm
it uses first tries to select a random port (bind(localhost:0)) for the
first address, and then bind the same port for the second one.

The creating of the second socket can fail as there's no guarantee that
port will be available -- it seems that the (linux) kernel tries to
choose an unused port for the first socket (I've had to create thousands
of sockets to reproduce this reliably), but this can apparantly fail
when the system is under load (and our test suite creates a _lot_ of
sockets).

The socket creationg operation is considered successful if it creates at
least one socket is created, but the test harness has no way of knowing
which one it is, so it can end up connecting to the wrong address.

I'm not aware of a way to atomically create two sockets bound to the
same port. One way to fix this would be to make lldb-server report the
address is it listening on instead of just the port. However, this would
be a breaking change and it's not clear to me that's worth it (the
algorithm works pretty well under normal circumstances).

Instead, this patch sidesteps that problem by using "reverse"
connections. This way, the test harness is responsible for creating the
listening socket so it can pass the address that it has managed to open.
It also results in much simpler code overall.

To preserve test coverage for the named pipe method, I've moved the
relevant code to a dedicated test. To avoid original problem, this test
passes raw addresses (as obtained by getaddrinfo(localhost)) instead of
"localhost".

Differential Revision: https://reviews.llvm.org/D90313
This commit is contained in:
Pavel Labath
2020-10-28 14:59:14 +01:00
parent d6d6fdb068
commit 8cc49bec2e
3 changed files with 122 additions and 212 deletions

View File

@@ -100,9 +100,6 @@ class GdbRemoteTestCaseBase(TestBase):
self.test_sequence = GdbRemoteTestSequence(self.logger)
self.set_inferior_startup_launch()
self.port = self.get_next_port()
self.named_pipe_path = None
self.named_pipe = None
self.named_pipe_fd = None
self.stub_sends_two_stop_notifications_on_kill = False
if configuration.lldb_platform_url:
if configuration.lldb_platform_url.startswith('unix-'):
@@ -154,87 +151,12 @@ class GdbRemoteTestCaseBase(TestBase):
def reset_test_sequence(self):
self.test_sequence = GdbRemoteTestSequence(self.logger)
def create_named_pipe(self):
# Create a temp dir and name for a pipe.
temp_dir = tempfile.mkdtemp()
named_pipe_path = os.path.join(temp_dir, "stub_port_number")
# Create the named pipe.
os.mkfifo(named_pipe_path)
# Open the read side of the pipe in non-blocking mode. This will
# return right away, ready or not.
named_pipe_fd = os.open(named_pipe_path, os.O_RDONLY | os.O_NONBLOCK)
# Create the file for the named pipe. Note this will follow semantics of
# a non-blocking read side of a named pipe, which has different semantics
# than a named pipe opened for read in non-blocking mode.
named_pipe = os.fdopen(named_pipe_fd, "r")
self.assertIsNotNone(named_pipe)
def shutdown_named_pipe():
# Close the pipe.
try:
named_pipe.close()
except:
print("failed to close named pipe")
None
# Delete the pipe.
try:
os.remove(named_pipe_path)
except:
print("failed to delete named pipe: {}".format(named_pipe_path))
None
# Delete the temp directory.
try:
os.rmdir(temp_dir)
except:
print(
"failed to delete temp dir: {}, directory contents: '{}'".format(
temp_dir, os.listdir(temp_dir)))
None
# Add the shutdown hook to clean up the named pipe.
self.addTearDownHook(shutdown_named_pipe)
# Clear the port so the stub selects a port number.
self.port = 0
return (named_pipe_path, named_pipe, named_pipe_fd)
def get_stub_port_from_named_socket(self):
# Wait for something to read with a max timeout.
(ready_readers, _, _) = select.select(
[self.named_pipe_fd], [], [], self.DEFAULT_TIMEOUT)
self.assertIsNotNone(
ready_readers,
"write side of pipe has not written anything - stub isn't writing to pipe.")
self.assertNotEqual(
len(ready_readers),
0,
"write side of pipe has not written anything - stub isn't writing to pipe.")
# Read the port from the named pipe.
stub_port_raw = self.named_pipe.read()
self.assertIsNotNone(stub_port_raw)
self.assertNotEqual(
len(stub_port_raw),
0,
"no content to read on pipe")
# Trim null byte, convert to int.
stub_port_raw = stub_port_raw[:-1]
stub_port = int(stub_port_raw)
self.assertTrue(stub_port > 0)
return stub_port
def init_llgs_test(self, use_named_pipe=True):
def init_llgs_test(self):
reverse_connect = True
if lldb.remote_platform:
# Remote platforms don't support named pipe based port negotiation
use_named_pipe = False
# Reverse connections may be tricky due to firewalls/NATs.
reverse_connect = False
triple = self.dbg.GetSelectedPlatform().GetTriple()
if re.match(".*-.*-windows", triple):
@@ -265,9 +187,9 @@ class GdbRemoteTestCaseBase(TestBase):
# Remove if it's there.
self.debug_monitor_exe = re.sub(r' \(deleted\)$', '', exe)
else:
# Need to figure out how to create a named pipe on Windows.
# TODO: enable this
if platform.system() == 'Windows':
use_named_pipe = False
reverse_connect = False
self.debug_monitor_exe = get_lldb_server_exe()
if not self.debug_monitor_exe:
@@ -276,18 +198,15 @@ class GdbRemoteTestCaseBase(TestBase):
self.debug_monitor_extra_args = ["gdbserver"]
self.setUpServerLogging(is_llgs=True)
if use_named_pipe:
(self.named_pipe_path, self.named_pipe,
self.named_pipe_fd) = self.create_named_pipe()
self.reverse_connect = reverse_connect
def init_debugserver_test(self, use_named_pipe=True):
def init_debugserver_test(self):
self.debug_monitor_exe = get_debugserver_exe()
if not self.debug_monitor_exe:
self.skipTest("debugserver exe not found")
self.setUpServerLogging(is_llgs=False)
if use_named_pipe:
(self.named_pipe_path, self.named_pipe,
self.named_pipe_fd) = self.create_named_pipe()
self.reverse_connect = True
# The debugserver stub has a race on handling the 'k' command, so it sends an X09 right away, then sends the real X notification
# when the process truly dies.
self.stub_sends_two_stop_notifications_on_kill = True
@@ -380,17 +299,17 @@ class GdbRemoteTestCaseBase(TestBase):
self._inferior_startup = self._STARTUP_ATTACH_MANUALLY
def get_debug_monitor_command_line_args(self, attach_pid=None):
if lldb.remote_platform:
commandline_args = self.debug_monitor_extra_args + \
["*:{}".format(self.port)]
else:
commandline_args = self.debug_monitor_extra_args + \
["localhost:{}".format(self.port)]
commandline_args = self.debug_monitor_extra_args
if attach_pid:
commandline_args += ["--attach=%d" % attach_pid]
if self.named_pipe_path:
commandline_args += ["--named-pipe", self.named_pipe_path]
if self.reverse_connect:
commandline_args += ["--reverse-connect", self.connect_address]
else:
if lldb.remote_platform:
commandline_args += ["*:{}".format(self.port)]
else:
commandline_args += ["localhost:{}".format(self.port)]
return commandline_args
def get_target_byte_order(self):
@@ -399,6 +318,17 @@ class GdbRemoteTestCaseBase(TestBase):
return target.GetByteOrder()
def launch_debug_monitor(self, attach_pid=None, logfile=None):
if self.reverse_connect:
family, type, proto, _, addr = socket.getaddrinfo("localhost", 0, proto=socket.IPPROTO_TCP)[0]
sock = socket.socket(family, type, proto)
sock.settimeout(self.DEFAULT_TIMEOUT)
sock.bind(addr)
sock.listen(1)
addr = sock.getsockname()
self.connect_address = "[{}]:{}".format(*addr)
# Create the command line.
commandline_args = self.get_debug_monitor_command_line_args(
attach_pid=attach_pid)
@@ -410,15 +340,13 @@ class GdbRemoteTestCaseBase(TestBase):
install_remote=False)
self.assertIsNotNone(server)
# If we're receiving the stub's listening port from the named pipe, do
# that here.
if self.named_pipe:
self.port = self.get_stub_port_from_named_socket()
if self.reverse_connect:
self.sock = sock.accept()[0]
return server
def connect_to_debug_monitor(self, attach_pid=None):
if self.named_pipe:
if self.reverse_connect:
# Create the stub.
server = self.launch_debug_monitor(attach_pid=attach_pid)
self.assertIsNotNone(server)
@@ -426,8 +354,6 @@ class GdbRemoteTestCaseBase(TestBase):
# Schedule debug monitor to be shut down during teardown.
logger = self.logger
# Attach to the stub and return a socket opened to it.
self.sock = self.create_socket()
return server
# We're using a random port algorithm to try not to collide with other ports,

View File

@@ -0,0 +1,89 @@
from __future__ import print_function
import gdbremote_testcase
import select
import socket
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
class TestGdbRemoteConnection(gdbremote_testcase.GdbRemoteTestCaseBase):
mydir = TestBase.compute_mydir(__file__)
@debugserver_test
# <rdar://problem/34539270> lldb-server tests not updated to work on ios etc yet
@skipIfDarwinEmbedded
def test_reverse_connect_debugserver(self):
self.init_debugserver_test()
self._reverse_connect()
@llgs_test
@skipIfRemote # reverse connect is not a supported use case for now
def test_reverse_connect_llgs(self):
self.init_llgs_test()
self._reverse_connect()
def _reverse_connect(self):
# Reverse connect is the default connection method.
self.connect_to_debug_monitor()
# Verify we can do the handshake. If that works, we'll call it good.
self.do_handshake(self.sock)
@debugserver_test
@skipIfRemote
def test_named_pipe_debugserver(self):
self.init_debugserver_test()
self._named_pipe()
@llgs_test
@skipIfRemote
@skipIfWindows
def test_named_pipe_llgs(self):
self.init_llgs_test()
self._named_pipe()
def _named_pipe(self):
family, type, proto, _, addr = socket.getaddrinfo(
self.stub_hostname, 0, proto=socket.IPPROTO_TCP)[0]
self.sock = socket.socket(family, type, proto)
self.sock.settimeout(self.DEFAULT_TIMEOUT)
self.addTearDownHook(lambda: self.sock.close())
named_pipe_path = self.getBuildArtifact("stub_port_number")
# Create the named pipe.
os.mkfifo(named_pipe_path)
# Open the read side of the pipe in non-blocking mode. This will
# return right away, ready or not.
named_pipe_fd = os.open(named_pipe_path, os.O_RDONLY | os.O_NONBLOCK)
self.addTearDownHook(lambda: os.close(named_pipe_fd))
args = self.debug_monitor_extra_args
if lldb.remote_platform:
args += ["*:0"]
else:
args += ["localhost:0"]
args += ["--named-pipe", named_pipe_path]
server = self.spawnSubprocess(
self.debug_monitor_exe,
args,
install_remote=False)
(ready_readers, _, _) = select.select(
[named_pipe_fd], [], [], self.DEFAULT_TIMEOUT)
self.assertIsNotNone(
ready_readers,
"write side of pipe has not written anything - stub isn't writing to pipe.")
port = os.read(named_pipe_fd, 10)
# Trim null byte, convert to int
addr = (addr[0], int(port[:-1]))
self.sock.connect(addr)
# Verify we can do the handshake. If that works, we'll call it good.
self.do_handshake(self.sock)

View File

@@ -1,105 +0,0 @@
from __future__ import print_function
import errno
import gdbremote_testcase
import lldbgdbserverutils
import re
import select
import socket
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil
class TestStubReverseConnect(gdbremote_testcase.GdbRemoteTestCaseBase):
mydir = TestBase.compute_mydir(__file__)
def setUp(self):
# Set up the test.
gdbremote_testcase.GdbRemoteTestCaseBase.setUp(self)
# Create a listener on a local port.
self.listener_socket = self.create_listener_socket()
self.assertIsNotNone(self.listener_socket)
self.listener_port = self.listener_socket.getsockname()[1]
def create_listener_socket(self):
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
except OSError as e:
if e.errno != errno.EAFNOSUPPORT:
raise
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
self.assertIsNotNone(sock)
sock.settimeout(self.DEFAULT_TIMEOUT)
if sock.family == socket.AF_INET:
bind_addr = ("127.0.0.1", 0)
elif sock.family == socket.AF_INET6:
bind_addr = ("::1", 0)
sock.bind(bind_addr)
sock.listen(1)
def tear_down_listener():
try:
sock.shutdown(socket.SHUT_RDWR)
except:
# ignore
None
self.addTearDownHook(tear_down_listener)
return sock
def reverse_connect_works(self):
# Indicate stub startup should do a reverse connect.
appended_stub_args = ["--reverse-connect"]
if self.debug_monitor_extra_args:
self.debug_monitor_extra_args += appended_stub_args
else:
self.debug_monitor_extra_args = appended_stub_args
self.stub_hostname = "127.0.0.1"
self.port = self.listener_port
triple = self.dbg.GetSelectedPlatform().GetTriple()
if re.match(".*-.*-.*-android", triple):
self.forward_adb_port(
self.port,
self.port,
"reverse",
self.stub_device)
# Start the stub.
server = self.launch_debug_monitor(logfile=sys.stdout)
self.assertIsNotNone(server)
self.assertTrue(
lldbgdbserverutils.process_is_running(
server.pid, True))
# Listen for the stub's connection to us.
(stub_socket, address) = self.listener_socket.accept()
self.assertIsNotNone(stub_socket)
self.assertIsNotNone(address)
print("connected to stub {} on {}".format(
address, stub_socket.getsockname()))
# Verify we can do the handshake. If that works, we'll call it good.
self.do_handshake(stub_socket)
# Clean up.
stub_socket.shutdown(socket.SHUT_RDWR)
@debugserver_test
@skipIfDarwinEmbedded # <rdar://problem/34539270> lldb-server tests not updated to work on ios etc yet
def test_reverse_connect_works_debugserver(self):
self.init_debugserver_test(use_named_pipe=False)
self.set_inferior_startup_launch()
self.reverse_connect_works()
@llgs_test
@skipIfRemote # reverse connect is not a supported use case for now
def test_reverse_connect_works_llgs(self):
self.init_llgs_test(use_named_pipe=False)
self.set_inferior_startup_launch()
self.reverse_connect_works()