[lldb] Use reverse connection method for lldb-server tests

This fixes an flakyness is all gdb-remote tests. These tests have been (mildly) flaky since we started using "localhost" instead of 127.0.0.1 in the test suite. The reason is that lldb-server needs to create two sockets (v4 and v6) to listen for localhost connections. The algorithm it uses first tries to select a random port (bind(localhost:0)) for the first address, and then bind the same port for the second one. The creating of the second socket can fail as there's no guarantee that port will be available -- it seems that the (linux) kernel tries to choose an unused port for the first socket (I've had to create thousands of sockets to reproduce this reliably), but this can apparantly fail when the system is under load (and our test suite creates a _lot_ of sockets). The socket creationg operation is considered successful if it creates at least one socket is created, but the test harness has no way of knowing which one it is, so it can end up connecting to the wrong address. I'm not aware of a way to atomically create two sockets bound to the same port. One way to fix this would be to make lldb-server report the address is it listening on instead of just the port. However, this would be a breaking change and it's not clear to me that's worth it (the algorithm works pretty well under normal circumstances). Instead, this patch sidesteps that problem by using "reverse" connections. This way, the test harness is responsible for creating the listening socket so it can pass the address that it has managed to open. It also results in much simpler code overall. To preserve test coverage for the named pipe method, I've moved the relevant code to a dedicated test. To avoid original problem, this test passes raw addresses (as obtained by getaddrinfo(localhost)) instead of "localhost". Differential Revision: https://reviews.llvm.org/D90313
2020-10-28 14:59:14 +01:00
parent d6d6fdb068
commit 8cc49bec2e
3 changed files with 122 additions and 212 deletions
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
@@ -100,9 +100,6 @@ class GdbRemoteTestCaseBase(TestBase):
        self.test_sequence = GdbRemoteTestSequence(self.logger)
        self.set_inferior_startup_launch()
        self.port = self.get_next_port()
-        self.named_pipe_path = None
-        self.named_pipe = None
-        self.named_pipe_fd = None
        self.stub_sends_two_stop_notifications_on_kill = False
        if configuration.lldb_platform_url:
            if configuration.lldb_platform_url.startswith('unix-'):
@@ -154,87 +151,12 @@ class GdbRemoteTestCaseBase(TestBase):
    def reset_test_sequence(self):
        self.test_sequence = GdbRemoteTestSequence(self.logger)

-    def create_named_pipe(self):
-        # Create a temp dir and name for a pipe.
-        temp_dir = tempfile.mkdtemp()
-        named_pipe_path = os.path.join(temp_dir, "stub_port_number")

-        # Create the named pipe.
-        os.mkfifo(named_pipe_path)
-
-        # Open the read side of the pipe in non-blocking mode.  This will
-        # return right away, ready or not.
-        named_pipe_fd = os.open(named_pipe_path, os.O_RDONLY | os.O_NONBLOCK)
-
-        # Create the file for the named pipe.  Note this will follow semantics of
-        # a non-blocking read side of a named pipe, which has different semantics
-        # than a named pipe opened for read in non-blocking mode.
-        named_pipe = os.fdopen(named_pipe_fd, "r")
-        self.assertIsNotNone(named_pipe)
-
-        def shutdown_named_pipe():
-            # Close the pipe.
-            try:
-                named_pipe.close()
-            except:
-                print("failed to close named pipe")
-                None
-
-            # Delete the pipe.
-            try:
-                os.remove(named_pipe_path)
-            except:
-                print("failed to delete named pipe: {}".format(named_pipe_path))
-                None
-
-            # Delete the temp directory.
-            try:
-                os.rmdir(temp_dir)
-            except:
-                print(
-                    "failed to delete temp dir: {}, directory contents: '{}'".format(
-                        temp_dir, os.listdir(temp_dir)))
-                None
-
-        # Add the shutdown hook to clean up the named pipe.
-        self.addTearDownHook(shutdown_named_pipe)
-
-        # Clear the port so the stub selects a port number.
-        self.port = 0
-
-        return (named_pipe_path, named_pipe, named_pipe_fd)
-
-    def get_stub_port_from_named_socket(self):
-        # Wait for something to read with a max timeout.
-        (ready_readers, _, _) = select.select(
-            [self.named_pipe_fd], [], [], self.DEFAULT_TIMEOUT)
-        self.assertIsNotNone(
-            ready_readers,
-            "write side of pipe has not written anything - stub isn't writing to pipe.")
-        self.assertNotEqual(
-            len(ready_readers),
-            0,
-            "write side of pipe has not written anything - stub isn't writing to pipe.")
-
-        # Read the port from the named pipe.
-        stub_port_raw = self.named_pipe.read()
-        self.assertIsNotNone(stub_port_raw)
-        self.assertNotEqual(
-            len(stub_port_raw),
-            0,
-            "no content to read on pipe")
-
-        # Trim null byte, convert to int.
-        stub_port_raw = stub_port_raw[:-1]
-        stub_port = int(stub_port_raw)
-        self.assertTrue(stub_port > 0)
-
-        return stub_port
-
-    def init_llgs_test(self, use_named_pipe=True):
+    def init_llgs_test(self):
+        reverse_connect = True
        if lldb.remote_platform:
-            # Remote platforms don't support named pipe based port negotiation
-            use_named_pipe = False
+            # Reverse connections may be tricky due to firewalls/NATs.
+            reverse_connect = False

            triple = self.dbg.GetSelectedPlatform().GetTriple()
            if re.match(".*-.*-windows", triple):
@@ -265,9 +187,9 @@ class GdbRemoteTestCaseBase(TestBase):
            # Remove if it's there.
            self.debug_monitor_exe = re.sub(r' \(deleted\)$', '', exe)
        else:
-            # Need to figure out how to create a named pipe on Windows.
+            # TODO: enable this
            if platform.system() == 'Windows':
-                use_named_pipe = False
+                reverse_connect = False

            self.debug_monitor_exe = get_lldb_server_exe()
            if not self.debug_monitor_exe:
@@ -276,18 +198,15 @@ class GdbRemoteTestCaseBase(TestBase):
        self.debug_monitor_extra_args = ["gdbserver"]
        self.setUpServerLogging(is_llgs=True)

-        if use_named_pipe:
-            (self.named_pipe_path, self.named_pipe,
-             self.named_pipe_fd) = self.create_named_pipe()
+        self.reverse_connect = reverse_connect

-    def init_debugserver_test(self, use_named_pipe=True):
+    def init_debugserver_test(self):
        self.debug_monitor_exe = get_debugserver_exe()
        if not self.debug_monitor_exe:
            self.skipTest("debugserver exe not found")
        self.setUpServerLogging(is_llgs=False)
-        if use_named_pipe:
-            (self.named_pipe_path, self.named_pipe,
-             self.named_pipe_fd) = self.create_named_pipe()
+        self.reverse_connect = True
+
        # The debugserver stub has a race on handling the 'k' command, so it sends an X09 right away, then sends the real X notification
        # when the process truly dies.
        self.stub_sends_two_stop_notifications_on_kill = True
@@ -380,17 +299,17 @@ class GdbRemoteTestCaseBase(TestBase):
        self._inferior_startup = self._STARTUP_ATTACH_MANUALLY

    def get_debug_monitor_command_line_args(self, attach_pid=None):
-        if lldb.remote_platform:
-            commandline_args = self.debug_monitor_extra_args + \
-                ["*:{}".format(self.port)]
-        else:
-            commandline_args = self.debug_monitor_extra_args + \
-                ["localhost:{}".format(self.port)]
-
+        commandline_args = self.debug_monitor_extra_args
        if attach_pid:
            commandline_args += ["--attach=%d" % attach_pid]
-        if self.named_pipe_path:
-            commandline_args += ["--named-pipe", self.named_pipe_path]
+        if self.reverse_connect:
+            commandline_args += ["--reverse-connect", self.connect_address]
+        else:
+            if lldb.remote_platform:
+                commandline_args += ["*:{}".format(self.port)]
+            else:
+                commandline_args += ["localhost:{}".format(self.port)]
+
        return commandline_args

    def get_target_byte_order(self):
@@ -399,6 +318,17 @@ class GdbRemoteTestCaseBase(TestBase):
        return target.GetByteOrder()

    def launch_debug_monitor(self, attach_pid=None, logfile=None):
+        if self.reverse_connect:
+            family, type, proto, _, addr = socket.getaddrinfo("localhost", 0, proto=socket.IPPROTO_TCP)[0]
+            sock = socket.socket(family, type, proto)
+            sock.settimeout(self.DEFAULT_TIMEOUT)
+
+            sock.bind(addr)
+            sock.listen(1)
+            addr = sock.getsockname()
+            self.connect_address = "[{}]:{}".format(*addr)
+
+
        # Create the command line.
        commandline_args = self.get_debug_monitor_command_line_args(
            attach_pid=attach_pid)
@@ -410,15 +340,13 @@ class GdbRemoteTestCaseBase(TestBase):
            install_remote=False)
        self.assertIsNotNone(server)

-        # If we're receiving the stub's listening port from the named pipe, do
-        # that here.
-        if self.named_pipe:
-            self.port = self.get_stub_port_from_named_socket()
+        if self.reverse_connect:
+            self.sock = sock.accept()[0]

        return server

    def connect_to_debug_monitor(self, attach_pid=None):
-        if self.named_pipe:
+        if self.reverse_connect:
            # Create the stub.
            server = self.launch_debug_monitor(attach_pid=attach_pid)
            self.assertIsNotNone(server)
@@ -426,8 +354,6 @@ class GdbRemoteTestCaseBase(TestBase):
            # Schedule debug monitor to be shut down during teardown.
            logger = self.logger

-            # Attach to the stub and return a socket opened to it.
-            self.sock = self.create_socket()
            return server

        # We're using a random port algorithm to try not to collide with other ports,
--- a/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py
+++ b/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py
@@ -0,0 +1,89 @@
+from __future__ import print_function
+
+import gdbremote_testcase
+import select
+import socket
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+
+
+class TestGdbRemoteConnection(gdbremote_testcase.GdbRemoteTestCaseBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    @debugserver_test
+    # <rdar://problem/34539270> lldb-server tests not updated to work on ios etc yet
+    @skipIfDarwinEmbedded
+    def test_reverse_connect_debugserver(self):
+        self.init_debugserver_test()
+        self._reverse_connect()
+
+    @llgs_test
+    @skipIfRemote  # reverse connect is not a supported use case for now
+    def test_reverse_connect_llgs(self):
+        self.init_llgs_test()
+        self._reverse_connect()
+
+    def _reverse_connect(self):
+        # Reverse connect is the default connection method.
+        self.connect_to_debug_monitor()
+        # Verify we can do the handshake.  If that works, we'll call it good.
+        self.do_handshake(self.sock)
+
+    @debugserver_test
+    @skipIfRemote
+    def test_named_pipe_debugserver(self):
+        self.init_debugserver_test()
+        self._named_pipe()
+
+    @llgs_test
+    @skipIfRemote
+    @skipIfWindows
+    def test_named_pipe_llgs(self):
+        self.init_llgs_test()
+        self._named_pipe()
+
+    def _named_pipe(self):
+        family, type, proto, _, addr = socket.getaddrinfo(
+            self.stub_hostname, 0, proto=socket.IPPROTO_TCP)[0]
+        self.sock = socket.socket(family, type, proto)
+        self.sock.settimeout(self.DEFAULT_TIMEOUT)
+
+        self.addTearDownHook(lambda: self.sock.close())
+
+        named_pipe_path = self.getBuildArtifact("stub_port_number")
+
+        # Create the named pipe.
+        os.mkfifo(named_pipe_path)
+
+        # Open the read side of the pipe in non-blocking mode.  This will
+        # return right away, ready or not.
+        named_pipe_fd = os.open(named_pipe_path, os.O_RDONLY | os.O_NONBLOCK)
+
+        self.addTearDownHook(lambda: os.close(named_pipe_fd))
+
+        args = self.debug_monitor_extra_args
+        if lldb.remote_platform:
+            args += ["*:0"]
+        else:
+            args += ["localhost:0"]
+
+        args += ["--named-pipe", named_pipe_path]
+
+        server = self.spawnSubprocess(
+            self.debug_monitor_exe,
+            args,
+            install_remote=False)
+
+        (ready_readers, _, _) = select.select(
+            [named_pipe_fd], [], [], self.DEFAULT_TIMEOUT)
+        self.assertIsNotNone(
+            ready_readers,
+            "write side of pipe has not written anything - stub isn't writing to pipe.")
+        port = os.read(named_pipe_fd, 10)
+        # Trim null byte, convert to int
+        addr = (addr[0], int(port[:-1]))
+        self.sock.connect(addr)
+
+        # Verify we can do the handshake.  If that works, we'll call it good.
+        self.do_handshake(self.sock)
--- a/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py
+++ b/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py
@@ -1,105 +0,0 @@
-from __future__ import print_function
-
-import errno
-import gdbremote_testcase
-import lldbgdbserverutils
-import re
-import select
-import socket
-from lldbsuite.test.decorators import *
-from lldbsuite.test.lldbtest import *
-from lldbsuite.test import lldbutil
-
-
-class TestStubReverseConnect(gdbremote_testcase.GdbRemoteTestCaseBase):
-
-    mydir = TestBase.compute_mydir(__file__)
-
-    def setUp(self):
-        # Set up the test.
-        gdbremote_testcase.GdbRemoteTestCaseBase.setUp(self)
-
-        # Create a listener on a local port.
-        self.listener_socket = self.create_listener_socket()
-        self.assertIsNotNone(self.listener_socket)
-        self.listener_port = self.listener_socket.getsockname()[1]
-
-    def create_listener_socket(self):
-        try:
-            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        except OSError as e:
-            if e.errno != errno.EAFNOSUPPORT:
-                raise
-            sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-        self.assertIsNotNone(sock)
-
-        sock.settimeout(self.DEFAULT_TIMEOUT)
-        if sock.family == socket.AF_INET:
-            bind_addr = ("127.0.0.1", 0)
-        elif sock.family == socket.AF_INET6:
-            bind_addr = ("::1", 0)
-        sock.bind(bind_addr)
-        sock.listen(1)
-
-        def tear_down_listener():
-            try:
-                sock.shutdown(socket.SHUT_RDWR)
-            except:
-                # ignore
-                None
-
-        self.addTearDownHook(tear_down_listener)
-        return sock
-
-    def reverse_connect_works(self):
-        # Indicate stub startup should do a reverse connect.
-        appended_stub_args = ["--reverse-connect"]
-        if self.debug_monitor_extra_args:
-            self.debug_monitor_extra_args += appended_stub_args
-        else:
-            self.debug_monitor_extra_args = appended_stub_args
-
-        self.stub_hostname = "127.0.0.1"
-        self.port = self.listener_port
-
-        triple = self.dbg.GetSelectedPlatform().GetTriple()
-        if re.match(".*-.*-.*-android", triple):
-            self.forward_adb_port(
-                self.port,
-                self.port,
-                "reverse",
-                self.stub_device)
-
-        # Start the stub.
-        server = self.launch_debug_monitor(logfile=sys.stdout)
-        self.assertIsNotNone(server)
-        self.assertTrue(
-            lldbgdbserverutils.process_is_running(
-                server.pid, True))
-
-        # Listen for the stub's connection to us.
-        (stub_socket, address) = self.listener_socket.accept()
-        self.assertIsNotNone(stub_socket)
-        self.assertIsNotNone(address)
-        print("connected to stub {} on {}".format(
-            address, stub_socket.getsockname()))
-
-        # Verify we can do the handshake.  If that works, we'll call it good.
-        self.do_handshake(stub_socket)
-
-        # Clean up.
-        stub_socket.shutdown(socket.SHUT_RDWR)
-
-    @debugserver_test
-    @skipIfDarwinEmbedded # <rdar://problem/34539270> lldb-server tests not updated to work on ios etc yet
-    def test_reverse_connect_works_debugserver(self):
-        self.init_debugserver_test(use_named_pipe=False)
-        self.set_inferior_startup_launch()
-        self.reverse_connect_works()
-
-    @llgs_test
-    @skipIfRemote  # reverse connect is not a supported use case for now
-    def test_reverse_connect_works_llgs(self):
-        self.init_llgs_test(use_named_pipe=False)
-        self.set_inferior_startup_launch()
-        self.reverse_connect_works()