From patchwork Mon Nov 20 00:41:54 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Pedro Alves X-Patchwork-Id: 24363 Received: (qmail 82228 invoked by alias); 20 Nov 2017 00:42:04 -0000 Mailing-List: contact gdb-patches-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: gdb-patches-owner@sourceware.org Delivered-To: mailing list gdb-patches@sourceware.org Received: (qmail 82186 invoked by uid 89); 20 Nov 2017 00:42:03 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-25.7 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_LAZY_DOMAIN_SECURITY, KB_WAM_FROM_NAME_SINGLEWORD, SPF_HELO_PASS, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=one's, duplicates X-HELO: mx1.redhat.com Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 20 Nov 2017 00:42:00 +0000 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 4AB895F73C; Mon, 20 Nov 2017 00:41:58 +0000 (UTC) Received: from cascais.lan (ovpn04.gateway.prod.ext.ams2.redhat.com [10.39.146.4]) by smtp.corp.redhat.com (Postfix) with ESMTP id 73546600C8; Mon, 20 Nov 2017 00:41:57 +0000 (UTC) From: Pedro Alves To: gdb-patches@sourceware.org Cc: Simon Marchi Subject: [PATCH 2/3] Unit test name-component bounds searching directly Date: Mon, 20 Nov 2017 00:41:54 +0000 Message-Id: <1511138515-25996-2-git-send-email-palves@redhat.com> In-Reply-To: <5d721d13-d886-0400-db6b-76485c545142@redhat.com> References: <5d721d13-d886-0400-db6b-76485c545142@redhat.com> MIME-Version: 1.0 This commit factors out the name-components-vector building and bounds searching out of dw2_expand_symtabs_matching_symbol into separate functions, and adds unit tests that: - expose both the latent bug mentioned in the previous commit, and also, - for completeness exercise the 0xff character handling fixed in the previous commit more directly. The actual fix for the now-exposed bug is left for the following patch. gdb/ChangeLog: yyyy-mm-dd Pedro Alves * dwarf2read.c (mapped_index::name_components_casing): New field. (mapped_index) name_components; + /* How NAME_COMPONENTS is sorted. */ + enum case_sensitivity name_components_casing; /* Convenience method to get at the name of the symbol at IDX in the symbol table. */ const char *symbol_name_at (offset_type idx) const { return this->constant_pool + MAYBE_SWAP (this->symbol_table[idx]); } + + /* Build the symbol name component sorted vector, if we haven't + yet. */ + void build_name_components (); + + /* Returns the lower (inclusive) and upper (exclusive) bounds of the + possible matches for LN_NO_PARAMS in the name component + vector. */ + std::pair::const_iterator, + std::vector::const_iterator> + find_name_components_bounds (const lookup_name_info &ln_no_params) const; }; typedef struct dwarf2_per_cu_data *dwarf2_per_cu_ptr; @@ -4242,80 +4255,15 @@ make_sort_after_prefix_name (const char *search_name) return after; } -/* Helper for dw2_expand_symtabs_matching that works with a - mapped_index instead of the containing objfile. This is split to a - separate function in order to be able to unit test the - name_components matching using a mock mapped_index. For each - symbol name that matches, calls MATCH_CALLBACK, passing it the - symbol's index in the mapped_index symbol table. */ +/* See declaration. */ -static void -dw2_expand_symtabs_matching_symbol - (mapped_index &index, - const lookup_name_info &lookup_name_in, - gdb::function_view symbol_matcher, - enum search_domain kind, - gdb::function_view match_callback) +std::pair::const_iterator, + std::vector::const_iterator> +mapped_index::find_name_components_bounds + (const lookup_name_info &lookup_name_without_params) const { - lookup_name_info lookup_name_without_params - = lookup_name_in.make_ignore_params (); - gdb_index_symbol_name_matcher lookup_name_matcher - (lookup_name_without_params); - - auto *name_cmp = case_sensitivity == case_sensitive_on ? strcmp : strcasecmp; - - /* Build the symbol name component sorted vector, if we haven't yet. - The code below only knows how to break apart components of C++ - symbol names (and other languages that use '::' as - namespace/module separator). If we add support for wild matching - to some language that uses some other operator (E.g., Ada, Go and - D use '.'), then we'll need to try splitting the symbol name - according to that language too. Note that Ada does support wild - matching, but doesn't currently support .gdb_index. */ - if (index.name_components.empty ()) - { - for (size_t iter = 0; iter < index.symbol_table_slots; ++iter) - { - offset_type idx = 2 * iter; - - if (index.symbol_table[idx] == 0 - && index.symbol_table[idx + 1] == 0) - continue; - - const char *name = index.symbol_name_at (idx); - - /* Add each name component to the name component table. */ - unsigned int previous_len = 0; - for (unsigned int current_len = cp_find_first_component (name); - name[current_len] != '\0'; - current_len += cp_find_first_component (name + current_len)) - { - gdb_assert (name[current_len] == ':'); - index.name_components.push_back ({previous_len, idx}); - /* Skip the '::'. */ - current_len += 2; - previous_len = current_len; - } - index.name_components.push_back ({previous_len, idx}); - } - - /* Sort name_components elements by name. */ - auto name_comp_compare = [&] (const name_component &left, - const name_component &right) - { - const char *left_qualified = index.symbol_name_at (left.idx); - const char *right_qualified = index.symbol_name_at (right.idx); - - const char *left_name = left_qualified + left.name_offset; - const char *right_name = right_qualified + right.name_offset; - - return name_cmp (left_name, right_name) < 0; - }; - - std::sort (index.name_components.begin (), - index.name_components.end (), - name_comp_compare); - } + auto *name_cmp + = this->name_components_casing == case_sensitive_on ? strcmp : strcasecmp; const char *cplus = lookup_name_without_params.cplus ().lookup_name ().c_str (); @@ -4325,7 +4273,7 @@ dw2_expand_symtabs_matching_symbol auto lookup_compare_lower = [&] (const name_component &elem, const char *name) { - const char *elem_qualified = index.symbol_name_at (elem.idx); + const char *elem_qualified = this->symbol_name_at (elem.idx); const char *elem_name = elem_qualified + elem.name_offset; return name_cmp (elem_name, name) < 0; }; @@ -4335,18 +4283,18 @@ dw2_expand_symtabs_matching_symbol auto lookup_compare_upper = [&] (const char *name, const name_component &elem) { - const char *elem_qualified = index.symbol_name_at (elem.idx); + const char *elem_qualified = this->symbol_name_at (elem.idx); const char *elem_name = elem_qualified + elem.name_offset; return name_cmp (name, elem_name) < 0; }; - auto begin = index.name_components.begin (); - auto end = index.name_components.end (); + auto begin = this->name_components.begin (); + auto end = this->name_components.end (); /* Find the lower bound. */ auto lower = [&] () { - if (lookup_name_in.completion_mode () && cplus[0] == '\0') + if (lookup_name_without_params.completion_mode () && cplus[0] == '\0') return begin; else return std::lower_bound (begin, end, cplus, lookup_compare_lower); @@ -4355,7 +4303,7 @@ dw2_expand_symtabs_matching_symbol /* Find the upper bound. */ auto upper = [&] () { - if (lookup_name_in.completion_mode ()) + if (lookup_name_without_params.completion_mode ()) { /* In completion mode, we want UPPER to point past all symbols names that have the same prefix. I.e., with @@ -4378,6 +4326,97 @@ dw2_expand_symtabs_matching_symbol return std::upper_bound (lower, end, cplus, lookup_compare_upper); } (); + return {lower, upper}; +} + +/* See declaration. */ + +void +mapped_index::build_name_components () +{ + if (!this->name_components.empty ()) + return; + + this->name_components_casing = case_sensitivity; + auto *name_cmp + = this->name_components_casing == case_sensitive_on ? strcmp : strcasecmp; + + /* The code below only knows how to break apart components of C++ + symbol names (and other languages that use '::' as + namespace/module separator). If we add support for wild matching + to some language that uses some other operator (E.g., Ada, Go and + D use '.'), then we'll need to try splitting the symbol name + according to that language too. Note that Ada does support wild + matching, but doesn't currently support .gdb_index. */ + for (size_t iter = 0; iter < this->symbol_table_slots; ++iter) + { + offset_type idx = 2 * iter; + + if (this->symbol_table[idx] == 0 + && this->symbol_table[idx + 1] == 0) + continue; + + const char *name = this->symbol_name_at (idx); + + /* Add each name component to the name component table. */ + unsigned int previous_len = 0; + for (unsigned int current_len = cp_find_first_component (name); + name[current_len] != '\0'; + current_len += cp_find_first_component (name + current_len)) + { + gdb_assert (name[current_len] == ':'); + this->name_components.push_back ({previous_len, idx}); + /* Skip the '::'. */ + current_len += 2; + previous_len = current_len; + } + this->name_components.push_back ({previous_len, idx}); + } + + /* Sort name_components elements by name. */ + auto name_comp_compare = [&] (const name_component &left, + const name_component &right) + { + const char *left_qualified = this->symbol_name_at (left.idx); + const char *right_qualified = this->symbol_name_at (right.idx); + + const char *left_name = left_qualified + left.name_offset; + const char *right_name = right_qualified + right.name_offset; + + return name_cmp (left_name, right_name) < 0; + }; + + std::sort (this->name_components.begin (), + this->name_components.end (), + name_comp_compare); +} + +/* Helper for dw2_expand_symtabs_matching that works with a + mapped_index instead of the containing objfile. This is split to a + separate function in order to be able to unit test the + name_components matching using a mock mapped_index. For each + symbol name that matches, calls MATCH_CALLBACK, passing it the + symbol's index in the mapped_index symbol table. */ + +static void +dw2_expand_symtabs_matching_symbol + (mapped_index &index, + const lookup_name_info &lookup_name_in, + gdb::function_view symbol_matcher, + enum search_domain kind, + gdb::function_view match_callback) +{ + lookup_name_info lookup_name_without_params + = lookup_name_in.make_ignore_params (); + gdb_index_symbol_name_matcher lookup_name_matcher + (lookup_name_without_params); + + /* Build the symbol name component sorted vector, if we haven't + yet. */ + index.build_name_components (); + + auto bounds = index.find_name_components_bounds (lookup_name_without_params); + /* Now for each symbol name in range, check to see if we have a name match, and if so, call the MATCH_CALLBACK callback. */ @@ -4389,17 +4428,17 @@ dw2_expand_symtabs_matching_symbol indexes that matched in a temporary vector and ignore duplicates. */ std::vector matches; - matches.reserve (std::distance (lower, upper)); + matches.reserve (std::distance (bounds.first, bounds.second)); - for (;lower != upper; ++lower) + for (; bounds.first != bounds.second; ++bounds.first) { - const char *qualified = index.symbol_name_at (lower->idx); + const char *qualified = index.symbol_name_at (bounds.first->idx); if (!lookup_name_matcher.matches (qualified) || (symbol_matcher != NULL && !symbol_matcher (qualified))) continue; - matches.push_back (lower->idx); + matches.push_back (bounds.first->idx); } std::sort (matches.begin (), matches.end ()); @@ -4576,8 +4615,83 @@ static const char *test_symbols[] = { "\377\377123", }; +/* Returns true if mapped_index::find_name_component_bounds method + finds EXPECTED_SYMS in INDEX when looking for SEARCH_NAME, in + completion mode. */ + +static bool +check_find_bounds_finds (mapped_index &index, + const char *search_name, + gdb::array_view expected_syms) +{ + lookup_name_info lookup_name (search_name, + symbol_name_match_type::FULL, true); + + auto bounds = index.find_name_components_bounds (lookup_name); + + size_t distance = std::distance (bounds.first, bounds.second); + if (distance != expected_syms.size ()) + return false; + + for (size_t exp_elem = 0; exp_elem < distance; exp_elem++) + { + auto nc_elem = bounds.first + exp_elem; + const char *qualified = index.symbol_name_at (nc_elem->idx); + if (strcmp (qualified, expected_syms[exp_elem]) != 0) + return false; + } + + return true; +} + +/* Test the lower-level mapped_index::find_name_component_bounds + method. */ + static void -run_test () +test_mapped_index_find_name_component_bounds () +{ + mock_mapped_index mock_index (test_symbols); + + mock_index.index ().build_name_components (); + + /* Test the lower-level find_bounds function in completion mode. */ + { + static const char *expected_syms[] = { + "t1_func", + "t1_func1", + "t1_fund", /* This one's incorrect. */ + }; + + SELF_CHECK (check_find_bounds_finds (mock_index.index (), + "t1_func", expected_syms)); + } + + /* Check that the increment-last-char in the name matching algorithm + for completion doesn't get confused with ANSI-1 'ÿ' / 0xff. */ + { + static const char *expected_syms[] = { + "\377", + "\377\377123", + }; + + SELF_CHECK (check_find_bounds_finds (mock_index.index (), + "\377", expected_syms)); + } + + { + static const char *expected_syms[] = { + "\377\377123", + }; + + SELF_CHECK (check_find_bounds_finds (mock_index.index (), + "\377\377", expected_syms)); + } +} + +/* Test dw2_expand_symtabs_matching_symbol. */ + +static void +test_dw2_expand_symtabs_matching_symbol () { mock_mapped_index mock_index (test_symbols); @@ -4734,6 +4848,13 @@ run_test () #undef CHECK_MATCH } +static void +run_test () +{ + test_mapped_index_find_name_component_bounds (); + test_dw2_expand_symtabs_matching_symbol (); +} + }} // namespace selftests::dw2_expand_symtabs_matching #endif /* GDB_SELF_TEST */