[1/3,gdb/contrib] Add spellcheck.sh
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gdb_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 |
success
|
Test passed
|
Commit Message
I came across a table containing common misspellings [1], and wrote a script to
detect and correct these misspellings.
The table also contains entries that have alternatives, like this:
...
addres->address, adders
...
and for those the script prints a TODO instead.
The script downloads the webpage containing the table, extracts the table and
caches it in .git/wikipedia-common-misspellings.txt to prevent downloading it
over and over again.
Example usage:
...
$ gdb/contrib/spellcheck.sh gdb*
...
ChangeLog files are silently skipped.
Checked with shellcheck.
Tested on x86_64-linux, by running it on the gdb* dirs on doing a build and
test run.
The results of running it are in the two following patches.
[1] https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
---
gdb/contrib/spellcheck.sh | 287 ++++++++++++++++++++++++++++++++++++++
1 file changed, 287 insertions(+)
create mode 100755 gdb/contrib/spellcheck.sh
base-commit: 4eb048d448835e9a612643858b2ec49c6b520b65
Comments
Tom de Vries <tdevries@suse.de> writes:
> I came across a table containing common misspellings [1], and wrote a script to
> detect and correct these misspellings.
>
> The table also contains entries that have alternatives, like this:
> ...
> addres->address, adders
> ...
> and for those the script prints a TODO instead.
>
> The script downloads the webpage containing the table, extracts the table and
> caches it in .git/wikipedia-common-misspellings.txt to prevent downloading it
> over and over again.
>
> Example usage:
> ...
> $ gdb/contrib/spellcheck.sh gdb*
> ...
>
> ChangeLog files are silently skipped.
>
> Checked with shellcheck.
>
> Tested on x86_64-linux, by running it on the gdb* dirs on doing a build and
> test run.
>
> The results of running it are in the two following patches.
LGTM. Giving a reviewed-by tag in case Eli wants to comment on the doc
changes. But I looked through everything and it seemed like a positive
change.
Reviewed-By: Andrew Burgess <aburgess@redhat.com>
Thanks,
Andrew
>
> [1] https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
> ---
> gdb/contrib/spellcheck.sh | 287 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 287 insertions(+)
> create mode 100755 gdb/contrib/spellcheck.sh
>
> diff --git a/gdb/contrib/spellcheck.sh b/gdb/contrib/spellcheck.sh
> new file mode 100755
> index 00000000000..e7db6217d45
> --- /dev/null
> +++ b/gdb/contrib/spellcheck.sh
> @@ -0,0 +1,287 @@
> +#!/bin/bash
> +
> +# Copyright (C) 2024 Free Software Foundation, Inc.
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program. If not, see <http://www.gnu.org/licenses/>.
> +
> +# Script to auto-correct common spelling mistakes.
> +#
> +# Example usage:
> +# $ ./gdb/contrib/spellcheck.sh gdb*
> +
> +scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
> +
> +url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
> +cache_dir=$scriptdir/../../.git
> +cache_file=wikipedia-common-misspellings.txt
> +dictionary=$cache_dir/$cache_file
> +
> +# Separators: space, slash, tab.
> +grep_separator=" |/| "
> +sed_separator=" \|/\|\t"
> +
> +usage ()
> +{
> + echo "usage: $(basename "$0") <file|dir>+"
> +}
> +
> +make_absolute ()
> +{
> + local arg
> + arg="$1"
> +
> + case "$arg" in
> + /*)
> + ;;
> + *)
> + arg=$(pwd -P)/"$arg"
> + ;;
> + esac
> +
> + echo "$arg"
> +}
> +
> +parse_args ()
> +{
> + local files
> + files=$(mktemp)
> + trap 'rm -f "$files"' EXIT
> +
> + if [ $# -eq -0 ]; then
> + usage
> + exit 1
> + fi
> +
> + local arg
> + for arg in "$@"; do
> + if [ -f "$arg" ]; then
> + arg=$(make_absolute "$arg")
> + readlink -e "$arg" \
> + >> "$files"
> + elif [ -d "$arg" ]; then
> + arg=$(make_absolute "$arg")
> + local f
> + find "$arg" -type f -exec readlink -e {} \; \
> + >> "$files"
> + else
> + echo "Not a file or directory: $arg"
> + exit 1
> + fi
> + done
> +
> + mapfile -t unique_files \
> + < <(sort -u "$files" \
> + | grep -v ChangeLog)
> +
> + rm -f "$files"
> + trap "" EXIT
> +}
> +
> +get_dictionary ()
> +{
> + if [ -f "$dictionary" ]; then
> + return
> + fi
> +
> + local webpage
> + webpage=$(mktemp)
> + trap 'rm -f "$webpage"' EXIT
> +
> + # Download web page containing table.
> + wget $url -O "$webpage"
> +
> + # Extract table from web page.
> + awk '/<pre>/,/<\/pre>/' "$webpage" \
> + | sed 's/<pre>//;s/<\/pre>//' \
> + | grep -E -v "^$" \
> + > "$dictionary"
> +
> + rm -f "$webpage"
> + trap "" EXIT
> +}
> +
> +parse_dictionary ()
> +{
> + # Parse dictionary.
> + mapfile -t words \
> + < <(awk -F '->' '{print $1}' "$dictionary")
> + mapfile -t replacements \
> + < <(awk -F '->' '{print $2}' "$dictionary")
> +}
> +
> +find_files_matching_words ()
> +{
> + local pat
> + pat=""
> + for word in "${words[@]}"; do
> + if [ "$pat" = "" ]; then
> + pat="$word"
> + else
> + pat="$pat|$word"
> + fi
> + done
> + pat="($pat)"
> +
> + local sep
> + sep=$grep_separator
> +
> + pat="(^|$sep)$pat($sep|$)"
> +
> + grep -E \
> + -l \
> + "$pat" \
> + "$@"
> +}
> +
> +find_files_matching_word ()
> +{
> + local pat
> + pat="$1"
> + shift
> +
> + local sep
> + sep=$grep_separator
> +
> + pat="(^|$sep)$pat($sep|$)"
> +
> + grep -E \
> + -l \
> + "$pat" \
> + "$@"
> +}
> +
> +replace_word_in_file ()
> +{
> + local word
> + word="$1"
> +
> + local replacement
> + replacement="$2"
> +
> + local file
> + file="$3"
> +
> + local sep
> + sep=$sed_separator
> +
> + # Save separator.
> + sep="\($sep\)"
> +
> + local repl1 repl2 repl3
> +
> + repl1="s%$sep$word$sep%\1$replacement\2%g"
> +
> + repl2="s%^$word$sep%$replacement\1%"
> +
> + repl3="s%$sep$word$%\1$replacement%"
> +
> + sed -i \
> + "$repl1;$repl2;$repl3" \
> + "$file"
> +}
> +
> +replace_word_in_files ()
> +{
> + local word
> + word="$1"
> +
> + local replacement
> + replacement="$2"
> +
> + shift 2
> +
> + local id
> + id="$word -> $replacement"
> +
> + # Reduce set of files for sed to operate on.
> + local files_matching_word
> + declare -a files_matching_word
> + mapfile -t files_matching_word \
> + < <(find_files_matching_word "$word" "$@")
> +
> + if [ ${#files_matching_word[@]} -eq 0 ]; then
> + return
> + fi
> +
> + if echo "$replacement"| grep -q ","; then
> + echo "TODO: $id"
> + return
> + fi
> +
> + declare -A md5sums
> +
> + local changed f before after
> + changed=false
> + for f in "${files_matching_word[@]}"; do
> + if [ "${md5sums[$f]}" = "" ]; then
> + md5sums[$f]=$(md5sum "$f")
> + fi
> +
> + before="${md5sums[$f]}"
> +
> + replace_word_in_file \
> + "$word" \
> + "$replacement" \
> + "$f"
> +
> + after=$(md5sum "$f")
> +
> + if [ "$after" != "$before" ]; then
> + md5sums[$f]="$after"
> + changed=true
> + fi
> + done
> +
> + if $changed; then
> + echo "$id"
> + fi
> +
> + find_files_matching_word "$word" "${files_matching_word[@]}" \
> + | awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
> +}
> +
> +main ()
> +{
> + declare -a unique_files
> + parse_args "$@"
> +
> + get_dictionary
> +
> + declare -a words
> + declare -a replacements
> + parse_dictionary
> +
> + # Reduce set of files for sed to operate on.
> + local files_matching_words
> + declare -a files_matching_words
> + mapfile -t files_matching_words \
> + < <(find_files_matching_words "${unique_files[@]}")
> +
> + if [ ${#files_matching_words[@]} -eq 0 ]; then
> + return
> + fi
> +
> + local i word replacement
> + i=0
> + for word in "${words[@]}"; do
> + replacement=${replacements[$i]}
> + i=$((i + 1))
> +
> + replace_word_in_files \
> + "$word" \
> + "$replacement" \
> + "${files_matching_words[@]}"
> + done
> +}
> +
> +main "$@"
>
> base-commit: 4eb048d448835e9a612643858b2ec49c6b520b65
> --
> 2.35.3
>>>>> "Andrew" == Andrew Burgess <aburgess@redhat.com> writes:
Andrew> LGTM. Giving a reviewed-by tag in case Eli wants to comment on the doc
Andrew> changes. But I looked through everything and it seemed like a positive
Andrew> change.
Andrew> Reviewed-By: Andrew Burgess <aburgess@redhat.com>
Looks good to me as well.
Approved-By: Tom Tromey <tom@tromey.com>
Tom
new file mode 100755
@@ -0,0 +1,287 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Free Software Foundation, Inc.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# Script to auto-correct common spelling mistakes.
+#
+# Example usage:
+# $ ./gdb/contrib/spellcheck.sh gdb*
+
+scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
+
+url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
+cache_dir=$scriptdir/../../.git
+cache_file=wikipedia-common-misspellings.txt
+dictionary=$cache_dir/$cache_file
+
+# Separators: space, slash, tab.
+grep_separator=" |/| "
+sed_separator=" \|/\|\t"
+
+usage ()
+{
+ echo "usage: $(basename "$0") <file|dir>+"
+}
+
+make_absolute ()
+{
+ local arg
+ arg="$1"
+
+ case "$arg" in
+ /*)
+ ;;
+ *)
+ arg=$(pwd -P)/"$arg"
+ ;;
+ esac
+
+ echo "$arg"
+}
+
+parse_args ()
+{
+ local files
+ files=$(mktemp)
+ trap 'rm -f "$files"' EXIT
+
+ if [ $# -eq -0 ]; then
+ usage
+ exit 1
+ fi
+
+ local arg
+ for arg in "$@"; do
+ if [ -f "$arg" ]; then
+ arg=$(make_absolute "$arg")
+ readlink -e "$arg" \
+ >> "$files"
+ elif [ -d "$arg" ]; then
+ arg=$(make_absolute "$arg")
+ local f
+ find "$arg" -type f -exec readlink -e {} \; \
+ >> "$files"
+ else
+ echo "Not a file or directory: $arg"
+ exit 1
+ fi
+ done
+
+ mapfile -t unique_files \
+ < <(sort -u "$files" \
+ | grep -v ChangeLog)
+
+ rm -f "$files"
+ trap "" EXIT
+}
+
+get_dictionary ()
+{
+ if [ -f "$dictionary" ]; then
+ return
+ fi
+
+ local webpage
+ webpage=$(mktemp)
+ trap 'rm -f "$webpage"' EXIT
+
+ # Download web page containing table.
+ wget $url -O "$webpage"
+
+ # Extract table from web page.
+ awk '/<pre>/,/<\/pre>/' "$webpage" \
+ | sed 's/<pre>//;s/<\/pre>//' \
+ | grep -E -v "^$" \
+ > "$dictionary"
+
+ rm -f "$webpage"
+ trap "" EXIT
+}
+
+parse_dictionary ()
+{
+ # Parse dictionary.
+ mapfile -t words \
+ < <(awk -F '->' '{print $1}' "$dictionary")
+ mapfile -t replacements \
+ < <(awk -F '->' '{print $2}' "$dictionary")
+}
+
+find_files_matching_words ()
+{
+ local pat
+ pat=""
+ for word in "${words[@]}"; do
+ if [ "$pat" = "" ]; then
+ pat="$word"
+ else
+ pat="$pat|$word"
+ fi
+ done
+ pat="($pat)"
+
+ local sep
+ sep=$grep_separator
+
+ pat="(^|$sep)$pat($sep|$)"
+
+ grep -E \
+ -l \
+ "$pat" \
+ "$@"
+}
+
+find_files_matching_word ()
+{
+ local pat
+ pat="$1"
+ shift
+
+ local sep
+ sep=$grep_separator
+
+ pat="(^|$sep)$pat($sep|$)"
+
+ grep -E \
+ -l \
+ "$pat" \
+ "$@"
+}
+
+replace_word_in_file ()
+{
+ local word
+ word="$1"
+
+ local replacement
+ replacement="$2"
+
+ local file
+ file="$3"
+
+ local sep
+ sep=$sed_separator
+
+ # Save separator.
+ sep="\($sep\)"
+
+ local repl1 repl2 repl3
+
+ repl1="s%$sep$word$sep%\1$replacement\2%g"
+
+ repl2="s%^$word$sep%$replacement\1%"
+
+ repl3="s%$sep$word$%\1$replacement%"
+
+ sed -i \
+ "$repl1;$repl2;$repl3" \
+ "$file"
+}
+
+replace_word_in_files ()
+{
+ local word
+ word="$1"
+
+ local replacement
+ replacement="$2"
+
+ shift 2
+
+ local id
+ id="$word -> $replacement"
+
+ # Reduce set of files for sed to operate on.
+ local files_matching_word
+ declare -a files_matching_word
+ mapfile -t files_matching_word \
+ < <(find_files_matching_word "$word" "$@")
+
+ if [ ${#files_matching_word[@]} -eq 0 ]; then
+ return
+ fi
+
+ if echo "$replacement"| grep -q ","; then
+ echo "TODO: $id"
+ return
+ fi
+
+ declare -A md5sums
+
+ local changed f before after
+ changed=false
+ for f in "${files_matching_word[@]}"; do
+ if [ "${md5sums[$f]}" = "" ]; then
+ md5sums[$f]=$(md5sum "$f")
+ fi
+
+ before="${md5sums[$f]}"
+
+ replace_word_in_file \
+ "$word" \
+ "$replacement" \
+ "$f"
+
+ after=$(md5sum "$f")
+
+ if [ "$after" != "$before" ]; then
+ md5sums[$f]="$after"
+ changed=true
+ fi
+ done
+
+ if $changed; then
+ echo "$id"
+ fi
+
+ find_files_matching_word "$word" "${files_matching_word[@]}" \
+ | awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
+}
+
+main ()
+{
+ declare -a unique_files
+ parse_args "$@"
+
+ get_dictionary
+
+ declare -a words
+ declare -a replacements
+ parse_dictionary
+
+ # Reduce set of files for sed to operate on.
+ local files_matching_words
+ declare -a files_matching_words
+ mapfile -t files_matching_words \
+ < <(find_files_matching_words "${unique_files[@]}")
+
+ if [ ${#files_matching_words[@]} -eq 0 ]; then
+ return
+ fi
+
+ local i word replacement
+ i=0
+ for word in "${words[@]}"; do
+ replacement=${replacements[$i]}
+ i=$((i + 1))
+
+ replace_word_in_files \
+ "$word" \
+ "$replacement" \
+ "${files_matching_words[@]}"
+ done
+}
+
+main "$@"