# frozen_string_literal: true

# A {regular expression}[https://en.wikipedia.org/wiki/Regular_expression]
# (also called a _regexp_) is a <i>match pattern</i> (also simply called a _pattern_).
#
# A common notation for a regexp uses enclosing slash characters:
#
#   /foo/
#
# A regexp may be applied to a <i>target string</i>;
# The part of the string (if any) that matches the pattern is called a _match_,
# and may be said <i>to match</i>:
#
#   re = /red/
#   re.match?('redirect') # => true   # Match at beginning of target.
#   re.match?('bored')    # => true   # Match at end of target.
#   re.match?('credit')   # => true   # Match within target.
#   re.match?('foo')      # => false  # No match.
#
# == \Regexp Uses
#
# A regexp may be used:
#
# - To extract substrings based on a given pattern:
#
#     re = /foo/              # => /foo/
#     re.match('food')        # => #<MatchData "foo">
#     re.match('good')        # => nil
#
#   See sections {Method match}[rdoc-ref:Regexp@Method+match]
#   and {Operator =~}[rdoc-ref:Regexp@Operator+-3D~].
#
# - To determine whether a string matches a given pattern:
#
#     re.match?('food') # => true
#     re.match?('good') # => false
#
#   See section {Method match?}[rdoc-ref:Regexp@Method+match-3F].
#
# - As an argument for calls to certain methods in other classes and modules;
#   most such methods accept an argument that may be either a string
#   or the (much more powerful) regexp.
#
#   See {Regexp Methods}[rdoc-ref:regexp/methods.rdoc].
#
# == \Regexp Objects
#
# A regexp object has:
#
# - A source; see {Sources}[rdoc-ref:Regexp@Sources].
#
# - Several modes; see {Modes}[rdoc-ref:Regexp@Modes].
#
# - A timeout; see {Timeouts}[rdoc-ref:Regexp@Timeouts].
#
# - An encoding; see {Encodings}[rdoc-ref:Regexp@Encodings].
#
# == Creating a \Regexp
#
# A regular expression may be created with:
#
# - A regexp literal using slash characters
#   (see {Regexp Literals}[rdoc-ref:syntax/literals.rdoc@Regexp+Literals]):
#
#     # This is a very common usage.
#     /foo/ # => /foo/
#
# - A <tt>%r</tt> regexp literal
#   (see {%r: Regexp Literals}[rdoc-ref:syntax/literals.rdoc@25r-3A+Regexp+Literals]):
#
#     # Same delimiter character at beginning and end;
#     # useful for avoiding escaping characters
#     %r/name\/value pair/ # => /name\/value pair/
#     %r:name/value pair:  # => /name\/value pair/
#     %r|name/value pair|  # => /name\/value pair/
#
#     # Certain "paired" characters can be delimiters.
#     %r[foo] # => /foo/
#     %r{foo} # => /foo/
#     %r(foo) # => /foo/
#     %r<foo> # => /foo/
#
# - \Method Regexp.new.
#
# == \Method <tt>match</tt>
#
# Each of the methods Regexp#match, String#match, and Symbol#match
# returns a MatchData object if a match was found, +nil+ otherwise;
# each also sets {global variables}[rdoc-ref:Regexp@Global+Variables]:
#
#   'food'.match(/foo/) # => #<MatchData "foo">
#   'food'.match(/bar/) # => nil
#
# == Operator <tt>=~</tt>
#
# Each of the operators Regexp#=~, String#=~, and Symbol#=~
# returns an integer offset if a match was found, +nil+ otherwise;
# each also sets {global variables}[rdoc-ref:Regexp@Global+Variables]:
#
#   /bar/ =~ 'foo bar' # => 4
#   'foo bar' =~ /bar/ # => 4
#   /baz/ =~ 'foo bar' # => nil
#
# == \Method <tt>match?</tt>
#
# Each of the methods Regexp#match?, String#match?, and Symbol#match?
# returns +true+ if a match was found, +false+ otherwise;
# none sets {global variables}[rdoc-ref:Regexp@Global+Variables]:
#
#   'food'.match?(/foo/) # => true
#   'food'.match?(/bar/) # => false
#
# == Global Variables
#
# Certain regexp-oriented methods assign values to global variables:
#
# - <tt>#match</tt>: see {Method match}[rdoc-ref:Regexp@Method+match].
# - <tt>#=~</tt>: see {Operator =~}[rdoc-ref:Regexp@Operator+-3D~].
#
# The affected global variables are:
#
# - <tt>$~</tt>: Returns a MatchData object, or +nil+.
# - <tt>$&</tt>: Returns the matched part of the string, or +nil+.
# - <tt>$`</tt>: Returns the part of the string to the left of the match, or +nil+.
# - <tt>$'</tt>: Returns the part of the string to the right of the match, or +nil+.
# - <tt>$+</tt>: Returns the last group matched, or +nil+.
# - <tt>$1</tt>, <tt>$2</tt>, etc.: Returns the first, second, etc.,
#   matched group, or +nil+.
#   Note that <tt>$0</tt> is quite different;
#   it returns the name of the currently executing program.
#
# Examples:
#
#   # Matched string, but no matched groups.
#   'foo bar bar baz'.match('bar')
#   $~ # => #<MatchData "bar">
#   $& # => "bar"
#   $` # => "foo "
#   $' # => " bar baz"
#   $+ # => nil
#   $1 # => nil
#
#   # Matched groups.
#   /s(\w{2}).*(c)/.match('haystack')
#   $~ # => #<MatchData "stac" 1:"ta" 2:"c">
#   $& # => "stac"
#   $` # => "hay"
#   $' # => "k"
#   $+ # => "c"
#   $1 # => "ta"
#   $2 # => "c"
#   $3 # => nil
#
#   # No match.
#   'foo'.match('bar')
#   $~ # => nil
#   $& # => nil
#   $` # => nil
#   $' # => nil
#   $+ # => nil
#   $1 # => nil
#
# Note that Regexp#match?, String#match?, and Symbol#match?
# do not set global variables.
#
# == Sources
#
# As seen above, the simplest regexp uses a literal expression as its source:
#
#   re = /foo/              # => /foo/
#   re.match('food')        # => #<MatchData "foo">
#   re.match('good')        # => nil
#
# A rich collection of available _subexpressions_
# gives the regexp great power and flexibility:
#
# - {Special characters}[rdoc-ref:Regexp@Special+Characters]
# - {Source literals}[rdoc-ref:Regexp@Source+Literals]
# - {Character classes}[rdoc-ref:Regexp@Character+Classes]
# - {Shorthand character classes}[rdoc-ref:Regexp@Shorthand+Character+Classes]
# - {Anchors}[rdoc-ref:Regexp@Anchors]
# - {Alternation}[rdoc-ref:Regexp@Alternation]
# - {Quantifiers}[rdoc-ref:Regexp@Quantifiers]
# - {Groups and captures}[rdoc-ref:Regexp@Groups+and+Captures]
# - {Unicode}[rdoc-ref:Regexp@Unicode]
# - {POSIX Bracket Expressions}[rdoc-ref:Regexp@POSIX+Bracket+Expressions]
# - {Comments}[rdoc-ref:Regexp@Comments]
#
# === Special Characters
#
# \Regexp special characters, called _metacharacters_,
# have special meanings in certain contexts;
# depending on the context, these are sometimes metacharacters:
#
#   . ? - + * ^ \ | $ ( ) [ ] { }
#
# To match a metacharacter literally, backslash-escape it:
#
#   # Matches one or more 'o' characters.
#   /o+/.match('foo')  # => #<MatchData "oo">
#   # Would match 'o+'.
#   /o\+/.match('foo') # => nil
#
# To match a backslash literally, backslash-escape it:
#
#   /\./.match('\.')  # => #<MatchData ".">
#   /\\./.match('\.') # => #<MatchData "\\.">
#
# Method Regexp.escape returns an escaped string:
#
#   Regexp.escape('.?-+*^\|$()[]{}')
#   # => "\\.\\?\\-\\+\\*\\^\\\\\\|\\$\\(\\)\\[\\]\\{\\}"
#
# === Source Literals
#
# The source literal largely behaves like a double-quoted string;
# see {String Literals}[rdoc-ref:syntax/literals.rdoc@String+Literals].
#
# In particular, a source literal may contain interpolated expressions:
#
#   s = 'foo'         # => "foo"
#   /#{s}/            # => /foo/
#   /#{s.capitalize}/ # => /Foo/
#   /#{2 + 2}/        # => /4/
#
# There are differences between an ordinary string literal and a source literal;
# see {Shorthand Character Classes}[rdoc-ref:Regexp@Shorthand+Character+Classes].
#
# - <tt>\s</tt> in an ordinary string literal is equivalent to a space character;
#   in a source literal, it's shorthand for matching a whitespace character.
# - In an ordinary string literal, these are (needlessly) escaped characters;
#   in a source literal, they are shorthands for various matching characters:
#
#     \w \W \d \D \h \H \S \R
#
# === Character Classes
#
# A <i>character class</i> is delimited by square brackets;
# it specifies that certain characters match at a given point in the target string:
#
#   # This character class will match any vowel.
#   re = /B[aeiou]rd/
#   re.match('Bird') # => #<MatchData "Bird">
#   re.match('Bard') # => #<MatchData "Bard">
#   re.match('Byrd') # => nil
#
# A character class may contain hyphen characters to specify ranges of characters:
#
#   # These regexps have the same effect.
#   /[abcdef]/.match('foo') # => #<MatchData "f">
#   /[a-f]/.match('foo')    # => #<MatchData "f">
#   /[a-cd-f]/.match('foo') # => #<MatchData "f">
#
# When the first character of a character class is a caret (<tt>^</tt>),
# the sense of the class is inverted: it matches any character _except_ those specified.
#
#   /[^a-eg-z]/.match('f') # => #<MatchData "f">
#
# A character class may contain another character class.
# By itself this isn't useful because <tt>[a-z[0-9]]</tt>
# describes the same set as <tt>[a-z0-9]</tt>.
#
# However, character classes also support the <tt>&&</tt> operator,
# which performs set intersection on its arguments.
# The two can be combined as follows:
#
#   /[a-w&&[^c-g]z]/ # ([a-w] AND ([^c-g] OR z))
#
# This is equivalent to:
#
#     /[abh-w]/
#
# === Shorthand Character Classes
#
# Each of the following metacharacters serves as a shorthand
# for a character class:
#
# - <tt>/./</tt>: Matches any character except a newline:
#
#     /./.match('foo') # => #<MatchData "f">
#     /./.match("\n")  # => nil
#
# - <tt>/./m</tt>: Matches any character, including a newline;
#   see {Multiline Mode}[rdoc-ref:Regexp@Multiline+Mode]:
#
#     /./m.match("\n") # => #<MatchData "\n">
#
# - <tt>/\w/</tt>: Matches a word character: equivalent to <tt>[a-zA-Z0-9_]</tt>:
#
#     /\w/.match(' foo') # => #<MatchData "f">
#     /\w/.match(' _')   # => #<MatchData "_">
#     /\w/.match(' ')    # => nil
#
# - <tt>/\W/</tt>: Matches a non-word character: equivalent to <tt>[^a-zA-Z0-9_]</tt>:
#
#     /\W/.match(' ') # => #<MatchData " ">
#     /\W/.match('_') # => nil
#
# - <tt>/\d/</tt>: Matches a digit character: equivalent to <tt>[0-9]</tt>:
#
#     /\d/.match('THX1138') # => #<MatchData "1">
#     /\d/.match('foo')     # => nil
#
# - <tt>/\D/</tt>: Matches a non-digit character: equivalent to <tt>[^0-9]</tt>:
#
#     /\D/.match('123Jump!') # => #<MatchData "J">
#     /\D/.match('123')      # => nil
#
# - <tt>/\h/</tt>: Matches a hexdigit character: equivalent to <tt>[0-9a-fA-F]</tt>:
#
#     /\h/.match('xyz fedcba9876543210') # => #<MatchData "f">
#     /\h/.match('xyz')                  # => nil
#
# - <tt>/\H/</tt>: Matches a non-hexdigit character: equivalent to <tt>[^0-9a-fA-F]</tt>:
#
#     /\H/.match('fedcba9876543210xyz') # => #<MatchData "x">
#     /\H/.match('fedcba9876543210')    # => nil
#
# - <tt>/\s/</tt>: Matches a whitespace character: equivalent to <tt>/[ \t\r\n\f\v]/</tt>:
#
#     /\s/.match('foo bar') # => #<MatchData " ">
#     /\s/.match('foo')     # => nil
#
# - <tt>/\S/</tt>: Matches a non-whitespace character: equivalent to <tt>/[^ \t\r\n\f\v]/</tt>:
#
#     /\S/.match(" \t\r\n\f\v foo") # => #<MatchData "f">
#     /\S/.match(" \t\r\n\f\v")     # => nil
#
# - <tt>/\R/</tt>: Matches a linebreak, platform-independently:
#
#     /\R/.match("\r")     # => #<MatchData "\r">     # Carriage return (CR)
#     /\R/.match("\n")     # => #<MatchData "\n">     # Newline (LF)
#     /\R/.match("\f")     # => #<MatchData "\f">     # Formfeed (FF)
#     /\R/.match("\v")     # => #<MatchData "\v">     # Vertical tab (VT)
#     /\R/.match("\r\n")   # => #<MatchData "\r\n">   # CRLF
#     /\R/.match("\u0085") # => #<MatchData "\u0085"> # Next line (NEL)
#     /\R/.match("\u2028") # => #<MatchData "\u2028"> # Line separator (LSEP)
#     /\R/.match("\u2029") # => #<MatchData "\u2029"> # Paragraph separator (PSEP)
#
# === Anchors
#
# An anchor is a metasequence that matches a zero-width position between
# characters in the target string.
#
# For a subexpression with no anchor,
# matching may begin anywhere in the target string:
#
#   /real/.match('surrealist') # => #<MatchData "real">
#
# For a subexpression with an anchor,
# matching must begin at the matched anchor.
#
# ==== Boundary Anchors
#
# Each of these anchors matches a boundary:
#
# - <tt>^</tt>: Matches the beginning of a line:
#
#     /^bar/.match("foo\nbar") # => #<MatchData "bar">
#     /^ar/.match("foo\nbar")  # => nil
#
# - <tt>$</tt>: Matches the end of a line:
#
#     /bar$/.match("foo\nbar") # => #<MatchData "bar">
#     /ba$/.match("foo\nbar")  # => nil
#
# - <tt>\A</tt>: Matches the beginning of the string:
#
#     /\Afoo/.match('foo bar')  # => #<MatchData "foo">
#     /\Afoo/.match(' foo bar') # => nil
#
# - <tt>\Z</tt>: Matches the end of the string;
#   if string ends with a single newline,
#   it matches just before the ending newline:
#
#     /foo\Z/.match('bar foo')     # => #<MatchData "foo">
#     /foo\Z/.match('foo bar')     # => nil
#     /foo\Z/.match("bar foo\n")   # => #<MatchData "foo">
#     /foo\Z/.match("bar foo\n\n") # => nil
#
# - <tt>\z</tt>: Matches the end of the string:
#
#     /foo\z/.match('bar foo')   # => #<MatchData "foo">
#     /foo\z/.match('foo bar')   # => nil
#     /foo\z/.match("bar foo\n") # => nil
#
# - <tt>\b</tt>: Matches word boundary when not inside brackets;
#   matches backspace (<tt>"0x08"</tt>) when inside brackets:
#
#     /foo\b/.match('foo bar') # => #<MatchData "foo">
#     /foo\b/.match('foobar')  # => nil
#
# - <tt>\B</tt>: Matches non-word boundary:
#
#     /foo\B/.match('foobar')  # => #<MatchData "foo">
#     /foo\B/.match('foo bar') # => nil
#
# - <tt>\G</tt>: Matches first matching position:
#
#   In methods like String#gsub and String#scan, it changes on each iteration.
#   It initially matches the beginning of subject, and in each following iteration it matches where the last match finished.
#
#     "    a b c".gsub(/ /, '_')   # => "____a_b_c"
#     "    a b c".gsub(/\G /, '_') # => "____a b c"
#
#   In methods like Regexp#match and String#match
#   that take an optional offset, it matches where the search begins.
#
#     "hello, world".match(/,/, 3)   # => #<MatchData ",">
#     "hello, world".match(/\G,/, 3) # => nil
#
# ==== Lookaround Anchors
#
# Lookahead anchors:
#
# - <tt>(?=_pat_)</tt>: Positive lookahead assertion:
#   ensures that the following characters match _pat_,
#   but doesn't include those characters in the matched substring.
#
# - <tt>(?!_pat_)</tt>: Negative lookahead assertion:
#   ensures that the following characters <i>do not</i> match _pat_,
#   but doesn't include those characters in the matched substring.
#
# Lookbehind anchors:
#
# - <tt>(?<=_pat_)</tt>: Positive lookbehind assertion:
#   ensures that the preceding characters match _pat_, but
#   doesn't include those characters in the matched substring.
#
# - <tt>(?<!_pat_)</tt>: Negative lookbehind assertion:
#   ensures that the preceding characters do not match
#   _pat_, but doesn't include those characters in the matched substring.
#
# The pattern below uses positive lookahead and positive lookbehind to match
# text appearing in <tt><b></tt>...<tt></b></tt> tags
# without including the tags in the match:
#
#   /(?<=<b>)\w+(?=<\/b>)/.match("Fortune favors the <b>bold</b>.")
#   # => #<MatchData "bold">
#
# ==== Match-Reset Anchor
#
# - <tt>\K</tt>: Match reset:
#   the matched content preceding <tt>\K</tt> in the regexp is excluded from the result.
#   For example, the following two regexps are almost equivalent:
#
#     /ab\Kc/.match('abc')    # => #<MatchData "c">
#     /(?<=ab)c/.match('abc') # => #<MatchData "c">
#
#   These match same string and <tt>$&</tt> equals <tt>'c'</tt>,
#   while the matched position is different.
#
#   As are the following two regexps:
#
#     /(a)\K(b)\Kc/
#     /(?<=(?<=(a))(b))c/
#
# === Alternation
#
# The vertical bar metacharacter (<tt>|</tt>) may be used within parentheses
# to express alternation:
# two or more subexpressions any of which may match the target string.
#
# Two alternatives:
#
#   re = /(a|b)/
#   re.match('foo') # => nil
#   re.match('bar') # => #<MatchData "b" 1:"b">
#
# Four alternatives:
#
#   re = /(a|b|c|d)/
#   re.match('shazam') # => #<MatchData "a" 1:"a">
#   re.match('cold')   # => #<MatchData "c" 1:"c">
#
# Each alternative is a subexpression, and may be composed of other subexpressions:
#
#   re = /([a-c]|[x-z])/
#   re.match('bar') # => #<MatchData "b" 1:"b">
#   re.match('ooz') # => #<MatchData "z" 1:"z">
#
# \Method Regexp.union provides a convenient way to construct
# a regexp with alternatives.
#
# === Quantifiers
#
# A simple regexp matches one character:
#
#   /\w/.match('Hello')  # => #<MatchData "H">
#
# An added _quantifier_ specifies how many matches are required or allowed:
#
# - <tt>*</tt> - Matches zero or more times:
#
#     /\w*/.match('')
#     # => #<MatchData "">
#     /\w*/.match('x')
#     # => #<MatchData "x">
#     /\w*/.match('xyz')
#     # => #<MatchData "yz">
#
# - <tt>+</tt> - Matches one or more times:
#
#     /\w+/.match('')    # => nil
#     /\w+/.match('x')   # => #<MatchData "x">
#     /\w+/.match('xyz') # => #<MatchData "xyz">
#
# - <tt>?</tt> - Matches zero or one times:
#
#     /\w?/.match('')    # => #<MatchData "">
#     /\w?/.match('x')   # => #<MatchData "x">
#     /\w?/.match('xyz') # => #<MatchData "x">
#
# - <tt>{</tt>_n_<tt>}</tt> - Matches exactly _n_ times:
#
#     /\w{2}/.match('')    # => nil
#     /\w{2}/.match('x')   # => nil
#     /\w{2}/.match('xyz') # => #<MatchData "xy">
#
# - <tt>{</tt>_min_<tt>,}</tt> - Matches _min_ or more times:
#
#     /\w{2,}/.match('')    # => nil
#     /\w{2,}/.match('x')   # => nil
#     /\w{2,}/.match('xy')  # => #<MatchData "xy">
#     /\w{2,}/.match('xyz') # => #<MatchData "xyz">
#
# - <tt>{,</tt>_max_<tt>}</tt> - Matches _max_ or fewer times:
#
#     /\w{,2}/.match('')    # => #<MatchData "">
#     /\w{,2}/.match('x')   # => #<MatchData "x">
#     /\w{,2}/.match('xyz') # => #<MatchData "xy">
#
# - <tt>{</tt>_min_<tt>,</tt>_max_<tt>}</tt> -
#   Matches at least _min_ times and at most _max_ times:
#
#     /\w{1,2}/.match('')    # => nil
#     /\w{1,2}/.match('x')   # => #<MatchData "x">
#     /\w{1,2}/.match('xyz') # => #<MatchData "xy">
#
# ==== Greedy, Lazy, or Possessive Matching
#
# Quantifier matching may be greedy, lazy, or possessive:
#
# - In _greedy_ matching, as many occurrences as possible are matched
#   while still allowing the overall match to succeed.
#   Greedy quantifiers: <tt>*</tt>, <tt>+</tt>, <tt>?</tt>,
#   <tt>{min, max}</tt> and its variants.
# - In _lazy_ matching, the minimum number of occurrences are matched.
#   Lazy quantifiers: <tt>*?</tt>, <tt>+?</tt>, <tt>??</tt>,
#   <tt>{min, max}?</tt> and its variants.
# - In _possessive_ matching, once a match is found, there is no backtracking;
#   that match is retained, even if it jeopardises the overall match.
#   Possessive quantifiers: <tt>*+</tt>, <tt>++</tt>, <tt>?+</tt>.
#   Note that <tt>{min, max}</tt> and its variants do _not_ support possessive matching.
#
# More:
#
# - About greedy and lazy matching, see
#   {Choosing Minimal or Maximal Repetition}[https://doc.lagout.org/programmation/Regular%20Expressions/Regular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Programming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-09-06%5D.pdf#tutorial-backtrack].
# - About possessive matching, see
#   {Eliminate Needless Backtracking}[https://doc.lagout.org/programmation/Regular%20Expressions/Regular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Programming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-09-06%5D.pdf#tutorial-backtrack].
#
# === Groups and Captures
#
# A simple regexp has (at most) one match:
#
#   re = /\d\d\d\d-\d\d-\d\d/
#   re.match('1943-02-04')      # => #<MatchData "1943-02-04">
#   re.match('1943-02-04').size # => 1
#   re.match('foo')             # => nil
#
# Adding one or more pairs of parentheses, <tt>(_subexpression_)</tt>,
# defines _groups_, which may result in multiple matched substrings,
# called _captures_:
#
#   re = /(\d\d\d\d)-(\d\d)-(\d\d)/
#   re.match('1943-02-04')      # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
#   re.match('1943-02-04').size # => 4
#
# The first capture is the entire matched string;
# the other captures are the matched substrings from the groups.
#
# A group may have a {quantifier}[rdoc-ref:Regexp@Quantifiers]:
#
#   re = /July 4(th)?/
#   re.match('July 4')   # => #<MatchData "July 4" 1:nil>
#   re.match('July 4th') # => #<MatchData "July 4th" 1:"th">
#
#   re = /(foo)*/
#   re.match('')       # => #<MatchData "" 1:nil>
#   re.match('foo')    # => #<MatchData "foo" 1:"foo">
#   re.match('foofoo') # => #<MatchData "foofoo" 1:"foo">
#
#   re = /(foo)+/
#   re.match('')       # => nil
#   re.match('foo')    # => #<MatchData "foo" 1:"foo">
#   re.match('foofoo') # => #<MatchData "foofoo" 1:"foo">
#
# The returned \MatchData object gives access to the matched substrings:
#
#   re = /(\d\d\d\d)-(\d\d)-(\d\d)/
#   md = re.match('1943-02-04')
#   # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04">
#   md[0] # => "1943-02-04"
#   md[1] # => "1943"
#   md[2] # => "02"
#   md[3] # => "04"
#
# ==== Non-Capturing Groups
#
# A group may be made non-capturing;
# it is still a group (and, for example, can have a quantifier),
# but its matching substring is not included among the captures.
#
# A non-capturing group begins with <tt>?:</tt> (inside the parentheses):
#
#   # Don't capture the year.
#   re = /(?:\d\d\d\d)-(\d\d)-(\d\d)/
#   md = re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"02" 2:"04">
#
# ==== Backreferences
#
# A group match may also be referenced within the regexp itself;
# such a reference is called a +backreference+:
#
#   /[csh](..) [csh]\1 in/.match('The cat sat in the hat')
#   # => #<MatchData "cat sat in" 1:"at">
#
# This table shows how each subexpression in the regexp above
# matches a substring in the target string:
#
#   | Subexpression in Regexp   | Matching Substring in Target String |
#   |---------------------------|-------------------------------------|
#   |       First '[csh]'       |            Character 'c'            |
#   |          '(..)'           |        First substring 'at'         |
#   |      First space ' '      |      First space character ' '      |
#   |       Second '[csh]'      |            Character 's'            |
#   | '\1' (backreference 'at') |        Second substring 'at'        |
#   |           ' in'           |            Substring ' in'          |
#
# A regexp may contain any number of groups:
#
# - For a large number of groups:
#
#   - The ordinary <tt>\\_n_</tt> notation applies only for _n_ in range (1..9).
#   - The <tt>MatchData[_n_]</tt> notation applies for any non-negative _n_.
#
# - <tt>\0</tt> is a special backreference, referring to the entire matched string;
#   it may not be used within the regexp itself,
#   but may be used outside it (for example, in a substitution method call):
#
#     'The cat sat in the hat'.gsub(/[csh]at/, '\0s')
#     # => "The cats sats in the hats"
#
# ==== Named Captures
#
# As seen above, a capture can be referred to by its number.
# A capture can also have a name,
# prefixed as <tt>?<_name_></tt> or <tt>?'_name_'</tt>,
# and the name (symbolized) may be used as an index in <tt>MatchData[]</tt>:
#
#   md = /\$(?<dollars>\d+)\.(?'cents'\d+)/.match("$3.67")
#   # => #<MatchData "$3.67" dollars:"3" cents:"67">
#   md[:dollars]  # => "3"
#   md[:cents]    # => "67"
#   # The capture numbers are still valid.
#   md[2]         # => "67"
#
# When a regexp contains a named capture, there are no unnamed captures:
#
#   /\$(?<dollars>\d+)\.(\d+)/.match("$3.67")
#   # => #<MatchData "$3.67" dollars:"3">
#
# A named group may be backreferenced as <tt>\k<_name_></tt>:
#
#   /(?<vowel>[aeiou]).\k<vowel>.\k<vowel>/.match('ototomy')
#   # => #<MatchData "ototo" vowel:"o">
#
# When (and only when) a regexp contains named capture groups
# and appears before the <tt>=~</tt> operator,
# the captured substrings are assigned to local variables with corresponding names:
#
#   /\$(?<dollars>\d+)\.(?<cents>\d+)/ =~ '$3.67'
#   dollars # => "3"
#   cents   # => "67"
#
# \Method Regexp#named_captures returns a hash of the capture names and substrings;
# method Regexp#names returns an array of the capture names.
#
# ==== Atomic Grouping
#
# A group may be made _atomic_ with <tt>(?></tt>_subexpression_<tt>)</tt>.
#
# This causes the subexpression to be matched
# independently of the rest of the expression,
# so that the matched substring becomes fixed for the remainder of the match,
# unless the entire subexpression must be abandoned and subsequently revisited.
#
# In this way _subexpression_ is treated as a non-divisible whole.
# Atomic grouping is typically used to optimise patterns
# to prevent needless backtracking .
#
# Example (without atomic grouping):
#
#   /".*"/.match('"Quote"') # => #<MatchData "\"Quote\"">
#
# Analysis:
#
# 1. The leading subexpression <tt>"</tt> in the pattern matches the first character
#    <tt>"</tt> in the target string.
# 2. The next subexpression <tt>.*</tt> matches the next substring <tt>Quote“</tt>
#    (including the trailing double-quote).
# 3. Now there is nothing left in the target string to match
#    the trailing subexpression <tt>"</tt> in the pattern;
#    this would cause the overall match to fail.
# 4. The matched substring is backtracked by one position: <tt>Quote</tt>.
# 5. The final subexpression <tt>"</tt> now matches the final substring <tt>"</tt>,
#    and the overall match succeeds.
#
# If subexpression <tt>.*</tt> is grouped atomically,
# the backtracking is disabled, and the overall match fails:
#
#   /"(?>.*)"/.match('"Quote"') # => nil
#
# Atomic grouping can affect performance;
# see {Atomic Group}[https://www.regular-expressions.info/atomic.html].
#
# ==== Subexpression Calls
#
# As seen above, a backreference number (<tt>\\_n_</tt>) or name (<tt>\k<_name_></tt>)
# gives access to a captured _substring_;
# the corresponding regexp _subexpression_ may also be accessed,
# via the number (<tt>\\g<i>n</i></tt>) or name (<tt>\g<_name_></tt>):
#
#   /\A(?<paren>\(\g<paren>*\))*\z/.match('(())')
#   # ^1
#   #      ^2
#   #           ^3
#   #                 ^4
#   #      ^5
#   #           ^6
#   #                      ^7
#   #                       ^8
#   #                       ^9
#   #                           ^10
#
# The pattern:
#
# 1.  Matches at the beginning of the string, i.e. before the first character.
# 2.  Enters a named group +paren+.
# 3.  Matches the first character in the string, <tt>'('</tt>.
# 4.  Calls the +paren+ group again, i.e. recurses back to the  second step.
# 5.  Re-enters the +paren+ group.
# 6.  Matches the second character in the string, <tt>'('</tt>.
# 7.  Attempts to call +paren+ a third time,
#     but fails because doing so would prevent an overall successful match.
# 8.  Matches the third character in the string, <tt>')'</tt>;
#     marks the end of the second recursive call
# 9.  Matches the fourth character in the string, <tt>')'</tt>.
# 10. Matches the end of the string.
#
# See {Subexpression calls}[https://learnbyexample.github.io/Ruby_Regexp/groupings-and-backreferences.html?highlight=subexpression#subexpression-calls].
#
# ==== Conditionals
#
# The conditional construct takes the form <tt>(?(_cond_)_yes_|_no_)</tt>, where:
#
# - _cond_ may be a capture number or name.
# - The match to be applied is _yes_ if _cond_ is captured;
#   otherwise the match to be applied is _no_.
# - If not needed, <tt>|_no_</tt> may be omitted.
#
# Examples:
#
#   re = /\A(foo)?(?(1)(T)|(F))\z/
#   re.match('fooT') # => #<MatchData "fooT" 1:"foo" 2:"T" 3:nil>
#   re.match('F')    # => #<MatchData "F" 1:nil 2:nil 3:"F">
#   re.match('fooF') # => nil
#   re.match('T')    # => nil
#
#   re = /\A(?<xyzzy>foo)?(?(<xyzzy>)(T)|(F))\z/
#   re.match('fooT') # => #<MatchData "fooT" xyzzy:"foo">
#   re.match('F')    # => #<MatchData "F" xyzzy:nil>
#   re.match('fooF') # => nil
#   re.match('T')    # => nil
#
#
# ==== Absence Operator
#
# The absence operator is a special group that matches anything which does _not_ match the contained subexpressions.
#
#   /(?~real)/.match('surrealist') # => #<MatchData "surrea">
#   /(?~real)ist/.match('surrealist') # => #<MatchData "ealist">
#   /sur(?~real)ist/.match('surrealist') # => nil
#
# === Unicode
#
# ==== Unicode Properties
#
# The <tt>/\p{_property_name_}/</tt> construct (with lowercase +p+)
# matches characters using a Unicode property name,
# much like a character class;
# property +Alpha+ specifies alphabetic characters:
#
#   /\p{Alpha}/.match('a') # => #<MatchData "a">
#   /\p{Alpha}/.match('1') # => nil
#
# A property can be inverted
# by prefixing the name with a caret character (<tt>^</tt>):
#
#   /\p{^Alpha}/.match('1') # => #<MatchData "1">
#   /\p{^Alpha}/.match('a') # => nil
#
# Or by using <tt>\P</tt> (uppercase +P+):
#
#   /\P{Alpha}/.match('1') # => #<MatchData "1">
#   /\P{Alpha}/.match('a') # => nil
#
# See {Unicode Properties}[rdoc-ref:regexp/unicode_properties.rdoc]
# for regexps based on the numerous properties.
#
# Some commonly-used properties correspond to POSIX bracket expressions:
#
# - <tt>/\p{Alnum}/</tt>: Alphabetic and numeric character
# - <tt>/\p{Alpha}/</tt>: Alphabetic character
# - <tt>/\p{Blank}/</tt>: Space or tab
# - <tt>/\p{Cntrl}/</tt>: Control character
# - <tt>/\p{Digit}/</tt>: Digit
#   characters, and similar)
# - <tt>/\p{Lower}/</tt>: Lowercase alphabetical character
# - <tt>/\p{Print}/</tt>: Like <tt>\p{Graph}</tt>, but includes the space character
# - <tt>/\p{Punct}/</tt>: Punctuation character
# - <tt>/\p{Space}/</tt>: Whitespace character (<tt>[:blank:]</tt>, newline,
#   carriage return, etc.)
# - <tt>/\p{Upper}/</tt>: Uppercase alphabetical
# - <tt>/\p{XDigit}/</tt>: Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)
#
# These are also commonly used:
#
# - <tt>/\p{Emoji}/</tt>: Unicode emoji.
# - <tt>/\p{Graph}/</tt>: Non-blank character
#   (excludes spaces, control characters, and similar).
# - <tt>/\p{Word}/</tt>: A member in one of these Unicode character
#   categories (see below) or having one of these Unicode properties:
#
#   - Unicode categories:
#     - +Mark+ (+M+).
#     - <tt>Decimal Number</tt> (+Nd+)
#     - <tt>Connector Punctuation</tt> (+Pc+).
#
#   - Unicode properties:
#     - +Alpha+
#     - <tt>Join_Control</tt>
#
# - <tt>/\p{ASCII}/</tt>: A character in the ASCII character set.
# - <tt>/\p{Any}/</tt>: Any Unicode character (including unassigned characters).
# - <tt>/\p{Assigned}/</tt>: An assigned character.
#
# ==== Unicode Character Categories
#
# A Unicode character category name:
#
# - May be either its full name or its abbreviated name.
# - Is case-insensitive.
# - Treats a space, a hyphen, and an underscore as equivalent.
#
# Examples:
#
#   /\p{lu}/                # => /\p{lu}/
#   /\p{LU}/                # => /\p{LU}/
#   /\p{Uppercase Letter}/  # => /\p{Uppercase Letter}/
#   /\p{Uppercase_Letter}/  # => /\p{Uppercase_Letter}/
#   /\p{UPPERCASE-LETTER}/  # => /\p{UPPERCASE-LETTER}/
#
# Below are the Unicode character category abbreviations and names.
# Enumerations of characters in each category are at the links.
#
# Letters:
#
# - +L+, +Letter+: +LC+, +Lm+, or +Lo+.
# - +LC+, +Cased_Letter+: +Ll+, +Lt+, or +Lu+.
# - {Lu, Lowercase_Letter}[https://www.compart.com/en/unicode/category/Ll].
# - {Lu, Modifier_Letter}[https://www.compart.com/en/unicode/category/Lm].
# - {Lu, Other_Letter}[https://www.compart.com/en/unicode/category/Lo].
# - {Lu, Titlecase_Letter}[https://www.compart.com/en/unicode/category/Lt].
# - {Lu, Uppercase_Letter}[https://www.compart.com/en/unicode/category/Lu].
#
# Marks:
#
# - +M+, +Mark+: +Mc+, +Me+, or +Mn+.
# - {Mc, Spacing_Mark}[https://www.compart.com/en/unicode/category/Mc].
# - {Me, Enclosing_Mark}[https://www.compart.com/en/unicode/category/Me].
# - {Mn, Nonapacing_Mark}[https://www.compart.com/en/unicode/category/Mn].
#
# Numbers:
#
# - +N+, +Number+: +Nd+, +Nl+, or +No+.
# - {Nd, Decimal_Number}[https://www.compart.com/en/unicode/category/Nd].
# - {Nl, Letter_Number}[https://www.compart.com/en/unicode/category/Nl].
# - {No, Other_Number}[https://www.compart.com/en/unicode/category/No].
#
# Punctation:
#
# - +P+, +Punctuation+: +Pc+, +Pd+, +Pe+, +Pf+, +Pi+, +Po+, or +Ps+.
# - {Pc, Connector_Punctuation}[https://www.compart.com/en/unicode/category/Pc].
# - {Pd, Dash_Punctuation}[https://www.compart.com/en/unicode/category/Pd].
# - {Pe, Close_Punctuation}[https://www.compart.com/en/unicode/category/Pe].
# - {Pf, Final_Punctuation}[https://www.compart.com/en/unicode/category/Pf].
# - {Pi, Initial_Punctuation}[https://www.compart.com/en/unicode/category/Pi].
# - {Po, Other_Punctuation}[https://www.compart.com/en/unicode/category/Po].
# - {Ps, Open_Punctuation}[https://www.compart.com/en/unicode/category/Ps].
#
# - +S+, +Symbol+: +Sc+, +Sk+, +Sm+, or +So+.
# - {Sc, Currency_Symbol}[https://www.compart.com/en/unicode/category/Sc].
# - {Sk, Modifier_Symbol}[https://www.compart.com/en/unicode/category/Sk].
# - {Sm, Math_Symbol}[https://www.compart.com/en/unicode/category/Sm].
# - {So, Other_Symbol}[https://www.compart.com/en/unicode/category/So].
#
# - +Z+, +Separator+: +Zl+, +Zp+, or +Zs+.
# - {Zl, Line_Separator}[https://www.compart.com/en/unicode/category/Zl].
# - {Zp, Paragraph_Separator}[https://www.compart.com/en/unicode/category/Zp].
# - {Zs, Space_Separator}[https://www.compart.com/en/unicode/category/Zs].
#
# - +C+, +Other+: +Cc+, +Cf+, +Cn+, +Co+, or +Cs+.
# - {Cc, Control}[https://www.compart.com/en/unicode/category/Cc].
# - {Cf, Format}[https://www.compart.com/en/unicode/category/Cf].
# - {Cn, Unassigned}[https://www.compart.com/en/unicode/category/Cn].
# - {Co, Private_Use}[https://www.compart.com/en/unicode/category/Co].
# - {Cs, Surrogate}[https://www.compart.com/en/unicode/category/Cs].
#
# ==== Unicode Scripts and Blocks
#
# Among the Unicode properties are:
#
# - {Unicode scripts}[https://en.wikipedia.org/wiki/Script_(Unicode)];
#   see {supported scripts}[https://www.unicode.org/standard/supported.html].
# - {Unicode blocks}[https://en.wikipedia.org/wiki/Unicode_block];
#   see {supported blocks}[http://www.unicode.org/Public/UNIDATA/Blocks.txt].
#
# === POSIX Bracket Expressions
#
# A POSIX <i>bracket expression</i> is also similar to a character class.
# These expressions provide a portable alternative to the above,
# with the added benefit of encompassing non-ASCII characters:
#
# - <tt>/\d/</tt> matches only ASCII decimal digits +0+ through +9+.
# - <tt>/[[:digit:]]/</tt> matches any character in the Unicode
#   <tt>Decimal Number</tt> (+Nd+) category;
#   see below.
#
# The POSIX bracket expressions:
#
# - <tt>/[[:digit:]]/</tt>: Matches a {Unicode digit}[https://www.compart.com/en/unicode/category/Nd]:
#
#     /[[:digit:]]/.match('9')       # => #<MatchData "9">
#     /[[:digit:]]/.match("\u1fbf9") # => #<MatchData "9">
#
# - <tt>/[[:xdigit:]]/</tt>: Matches a digit allowed in a hexadecimal number;
#   equivalent to <tt>[0-9a-fA-F]</tt>.
#
# - <tt>/[[:upper:]]/</tt>: Matches a {Unicode uppercase letter}[https://www.compart.com/en/unicode/category/Lu]:
#
#     /[[:upper:]]/.match('A')      # => #<MatchData "A">
#     /[[:upper:]]/.match("\u00c6") # => #<MatchData "Æ">
#
# - <tt>/[[:lower:]]/</tt>: Matches a {Unicode lowercase letter}[https://www.compart.com/en/unicode/category/Ll]:
#
#     /[[:lower:]]/.match('a')      # => #<MatchData "a">
#     /[[:lower:]]/.match("\u01fd") # => #<MatchData "ǽ">
#
# - <tt>/[[:alpha:]]/</tt>: Matches <tt>/[[:upper:]]/</tt> or <tt>/[[:lower:]]/</tt>.
#
# - <tt>/[[:alnum:]]/</tt>: Matches <tt>/[[:alpha:]]/</tt> or <tt>/[[:digit:]]/</tt>.
#
# - <tt>/[[:space:]]/</tt>: Matches {Unicode space character}[https://www.compart.com/en/unicode/category/Zs]:
#
#     /[[:space:]]/.match(' ')      # => #<MatchData " ">
#     /[[:space:]]/.match("\u2005") # => #<MatchData " ">
#
# - <tt>/[[:blank:]]/</tt>: Matches <tt>/[[:space:]]/</tt> or tab character:
#
#     /[[:blank:]]/.match(' ')      # => #<MatchData " ">
#     /[[:blank:]]/.match("\u2005") # => #<MatchData " ">
#     /[[:blank:]]/.match("\t")     # => #<MatchData "\t">
#
# - <tt>/[[:cntrl:]]/</tt>: Matches {Unicode control character}[https://www.compart.com/en/unicode/category/Cc]:
#
#     /[[:cntrl:]]/.match("\u0000") # => #<MatchData "\u0000">
#     /[[:cntrl:]]/.match("\u009f") # => #<MatchData "\u009F">
#
# - <tt>/[[:graph:]]/</tt>: Matches any character
#   except <tt>/[[:space:]]/</tt> or <tt>/[[:cntrl:]]/</tt>.
#
# - <tt>/[[:print:]]/</tt>: Matches <tt>/[[:graph:]]/</tt> or space character.
#
# - <tt>/[[:punct:]]/</tt>: Matches any (Unicode punctuation character}[https://www.compart.com/en/unicode/category/Po]:
#
# Ruby also supports these (non-POSIX) bracket expressions:
#
# - <tt>/[[:ascii:]]/</tt>: Matches a character in the ASCII character set.
# - <tt>/[[:word:]]/</tt>: Matches a character in one of these Unicode character
#   categories or having one of these Unicode properties:
#
#   - Unicode categories:
#     - +Mark+ (+M+).
#     - <tt>Decimal Number</tt> (+Nd+)
#     - <tt>Connector Punctuation</tt> (+Pc+).
#
#   - Unicode properties:
#     - +Alpha+
#     - <tt>Join_Control</tt>
#
# === Comments
#
# A comment may be included in a regexp pattern
# using the <tt>(?#</tt>_comment_<tt>)</tt> construct,
# where _comment_ is a substring that is to be ignored.
# arbitrary text ignored by the regexp engine:
#
#   /foo(?#Ignore me)bar/.match('foobar') # => #<MatchData "foobar">
#
# The comment may not include an unescaped terminator character.
#
# See also {Extended Mode}[rdoc-ref:Regexp@Extended+Mode].
#
# == Modes
#
# Each of these modifiers sets a mode for the regexp:
#
# - +i+: <tt>/_pattern_/i</tt> sets
#   {Case-Insensitive Mode}[rdoc-ref:Regexp@Case-Insensitive+Mode].
# - +m+: <tt>/_pattern_/m</tt> sets
#   {Multiline Mode}[rdoc-ref:Regexp@Multiline+Mode].
# - +x+: <tt>/_pattern_/x</tt> sets
#   {Extended Mode}[rdoc-ref:Regexp@Extended+Mode].
# - +o+: <tt>/_pattern_/o</tt> sets
#   {Interpolation Mode}[rdoc-ref:Regexp@Interpolation+Mode].
#
# Any, all, or none of these may be applied.
#
# Modifiers +i+, +m+, and +x+ may be applied to subexpressions:
#
# - <tt>(?_modifier_)</tt> turns the mode "on" for ensuing subexpressions
# - <tt>(?-_modifier_)</tt> turns the mode "off" for ensuing subexpressions
# - <tt>(?_modifier_:_subexp_)</tt> turns the mode "on" for _subexp_ within the group
# - <tt>(?-_modifier_:_subexp_)</tt> turns the mode "off" for _subexp_ within the group
#
# Example:
#
#   re = /(?i)te(?-i)st/
#   re.match('test') # => #<MatchData "test">
#   re.match('TEst') # => #<MatchData "TEst">
#   re.match('TEST') # => nil
#   re.match('teST') # => nil
#
#   re = /t(?i:e)st/
#   re.match('test') # => #<MatchData "test">
#   re.match('tEst') # => #<MatchData "tEst">
#   re.match('tEST') # => nil
#
# \Method Regexp#options returns an integer whose value showing
# the settings for case-insensitivity mode, multiline mode, and extended mode.
#
# === Case-Insensitive Mode
#
# By default, a regexp is case-sensitive:
#
#   /foo/.match('FOO')  # => nil
#
# Modifier +i+ enables case-insensitive mode:
#
#   /foo/i.match('FOO')
#   # => #<MatchData "FOO">
#
# \Method Regexp#casefold? returns whether the mode is case-insensitive.
#
# === Multiline Mode
#
# The multiline-mode in Ruby is what is commonly called a "dot-all mode":
#
# - Without the +m+ modifier, the subexpression <tt>.</tt> does not match newlines:
#
#     /a.c/.match("a\nc")  # => nil
#
# - With the modifier, it does match:
#
#     /a.c/m.match("a\nc") # => #<MatchData "a\nc">
#
# Unlike other languages, the modifier +m+ does not affect the anchors <tt>^</tt> and <tt>$</tt>.
# These anchors always match at line-boundaries in Ruby.
#
# === Extended Mode
#
# Modifier +x+ enables extended mode, which means that:
#
# - Literal white space in the pattern is to be ignored.
# - Character <tt>#</tt> marks the remainder of its containing line as a comment,
#   which is also to be ignored for matching purposes.
#
# In extended mode, whitespace and comments may be used
# to form a self-documented regexp.
#
# Regexp not in extended mode (matches some Roman numerals):
#
#   pattern = '^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$'
#   re = /#{pattern}/
#   re.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III">
#
# Regexp in extended mode:
#
#   pattern = <<-EOT
#     ^                   # beginning of string
#     M{0,3}              # thousands - 0 to 3 Ms
#     (CM|CD|D?C{0,3})    # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 Cs),
#                         #            or 500-800 (D, followed by 0 to 3 Cs)
#     (XC|XL|L?X{0,3})    # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 Xs),
#                         #        or 50-80 (L, followed by 0 to 3 Xs)
#     (IX|IV|V?I{0,3})    # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 Is),
#                         #        or 5-8 (V, followed by 0 to 3 Is)
#     $                   # end of string
#   EOT
#   re = /#{pattern}/x
#   re.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III">
#
# === Interpolation Mode
#
# Modifier +o+ means that the first time a literal regexp with interpolations
# is encountered,
# the generated Regexp object is saved and used for all future evaluations
# of that literal regexp.
# Without modifier +o+, the generated Regexp is not saved,
# so each evaluation of the literal regexp generates a new Regexp object.
#
# Without modifier +o+:
#
#   def letters; sleep 5; /[A-Z][a-z]/; end
#   words = %w[abc def xyz]
#   start = Time.now
#   words.each {|word| word.match(/\A[#{letters}]+\z/) }
#   Time.now - start # => 15.0174892
#
# With modifier +o+:
#
#   start = Time.now
#   words.each {|word| word.match(/\A[#{letters}]+\z/o) }
#   Time.now - start # => 5.0010866
#
# Note that if the literal regexp does not have interpolations,
# the +o+ behavior is the default.
#
# == Encodings
#
# By default, a regexp with only US-ASCII characters has US-ASCII encoding:
#
#   re = /foo/
#   re.source.encoding # => #<Encoding:US-ASCII>
#   re.encoding        # => #<Encoding:US-ASCII>
#
# A regular expression containing non-US-ASCII characters
# is assumed to use the source encoding.
# This can be overridden with one of the following modifiers.
#
# - <tt>/_pat_/n</tt>: US-ASCII if only containing US-ASCII characters,
#   otherwise ASCII-8BIT:
#
#     /foo/n.encoding     # => #<Encoding:US-ASCII>
#     /foo\xff/n.encoding # => #<Encoding:ASCII-8BIT>
#     /foo\x7f/n.encoding # => #<Encoding:US-ASCII>
#
# - <tt>/_pat_/u</tt>: UTF-8
#
#     /foo/u.encoding # => #<Encoding:UTF-8>
#
# - <tt>/_pat_/e</tt>: EUC-JP
#
#     /foo/e.encoding # => #<Encoding:EUC-JP>
#
# - <tt>/_pat_/s</tt>: Windows-31J
#
#     /foo/s.encoding # => #<Encoding:Windows-31J>
#
# A regexp can be matched against a target string when either:
#
# - They have the same encoding.
# - The regexp's encoding is a fixed encoding and the string
#   contains only ASCII characters.
#   Method Regexp#fixed_encoding? returns whether the regexp
#   has a <i>fixed</i> encoding.
#
# If a match between incompatible encodings is attempted an
# <tt>Encoding::CompatibilityError</tt> exception is raised.
#
# Example:
#
#   re = eval("# encoding: ISO-8859-1\n/foo\\xff?/")
#   re.encoding                 # => #<Encoding:ISO-8859-1>
#   re =~ "foo".encode("UTF-8") # => 0
#   re =~ "foo\u0100"           # Raises Encoding::CompatibilityError
#
# The encoding may be explicitly fixed by including Regexp::FIXEDENCODING
# in the second argument for Regexp.new:
#
#   # Regexp with encoding ISO-8859-1.
#   re = Regexp.new("a".force_encoding('iso-8859-1'), Regexp::FIXEDENCODING)
#   re.encoding  # => #<Encoding:ISO-8859-1>
#   # Target string with encoding UTF-8.
#   s = "a\u3042"
#   s.encoding   # => #<Encoding:UTF-8>
#   re.match(s)  # Raises Encoding::CompatibilityError.
#
# == Timeouts
#
# When either a regexp source or a target string comes from untrusted input,
# malicious values could become a denial-of-service attack;
# to prevent such an attack, it is wise to set a timeout.
#
# \Regexp has two timeout values:
#
# - A class default timeout, used for a regexp whose instance timeout is +nil+;
#   this default is initially +nil+, and may be set by method Regexp.timeout=:
#
#     Regexp.timeout # => nil
#     Regexp.timeout = 3.0
#     Regexp.timeout # => 3.0
#
# - An instance timeout, which defaults to +nil+ and may be set in Regexp.new:
#
#     re = Regexp.new('foo', timeout: 5.0)
#     re.timeout # => 5.0
#
# When regexp.timeout is +nil+, the timeout "falls through" to Regexp.timeout;
# when regexp.timeout is non-+nil+, that value controls timing out:
#
#   | regexp.timeout Value | Regexp.timeout Value |            Result           |
#   |----------------------|----------------------|-----------------------------|
#   |         nil          |          nil         |       Never times out.      |
#   |         nil          |         Float        | Times out in Float seconds. |
#   |        Float         |          Any         | Times out in Float seconds. |
#
# == Optimization
#
# For certain values of the pattern and target string,
# matching time can grow polynomially or exponentially in relation to the input size;
# the potential vulnerability arising from this is the {regular expression denial-of-service}[https://en.wikipedia.org/wiki/ReDoS] (ReDoS) attack.
#
# \Regexp matching can apply an optimization to prevent ReDoS attacks.
# When the optimization is applied, matching time increases linearly (not polynomially or exponentially)
# in relation to the input size, and a ReDoS attach is not possible.
#
# This optimization is applied if the pattern meets these criteria:
#
# - No backreferences.
# - No subexpression calls.
# - No nested lookaround anchors or atomic groups.
# - No nested quantifiers with counting (i.e. no nested <tt>{n}</tt>,
#   <tt>{min,}</tt>, <tt>{,max}</tt>, or <tt>{min,max}</tt> style quantifiers)
#
# You can use method Regexp.linear_time? to determine whether a pattern meets these criteria:
#
#   Regexp.linear_time?(/a*/)     # => true
#   Regexp.linear_time?('a*')     # => true
#   Regexp.linear_time?(/(a*)\1/) # => false
#
# However, an untrusted source may not be safe even if the method returns +true+,
# because the optimization uses memoization (which may invoke large memory consumption).
#
# == References
#
# Read (online PDF books):
#
# - {Mastering Regular Expressions}[https://ia902508.us.archive.org/10/items/allitebooks-02/Mastering%20Regular%20Expressions%2C%203rd%20Edition.pdf]
#   by Jeffrey E.F. Friedl.
# - {Regular Expressions Cookbook}[https://doc.lagout.org/programmation/Regular%20Expressions/Regular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Programming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-09-06%5D.pdf]
#   by Jan Goyvaerts & Steven Levithan.
#
# Explore, test (interactive online editor):
#
# - {Rubular}[https://rubular.com/].
class Regexp
  # see Regexp.options and Regexp.new
  EXTENDED = _
  # see Regexp.options and Regexp.new
  FIXEDENCODING = _
  # see Regexp.options and Regexp.new
  IGNORECASE = _
  # see Regexp.options and Regexp.new
  MULTILINE = _
  # see Regexp.options and Regexp.new
  NOENCODING = _

  # Alias for Regexp.new
  def self.compile(*args) end

  # Returns a new string that escapes any characters
  # that have special meaning in a regular expression:
  #
  #   s = Regexp.escape('\*?{}.')      # => "\\\\\\*\\?\\{\\}\\."
  #
  # For any string +s+, this call returns a MatchData object:
  #
  #   r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
  #   r.match(s)                       # => #<MatchData "\\\\\\*\\?\\{\\}\\.">
  def self.escape(string) end

  # With no argument, returns the value of <tt>$!</tt>,
  # which is the result of the most recent pattern match
  # (see {Regexp global variables}[rdoc-ref:Regexp@Global+Variables]):
  #
  #   /c(.)t/ =~ 'cat'  # => 0
  #   Regexp.last_match # => #<MatchData "cat" 1:"a">
  #   /a/ =~ 'foo'      # => nil
  #   Regexp.last_match # => nil
  #
  # With non-negative integer argument +n+, returns the _n_th field in the
  # matchdata, if any, or nil if none:
  #
  #   /c(.)t/ =~ 'cat'     # => 0
  #   Regexp.last_match(0) # => "cat"
  #   Regexp.last_match(1) # => "a"
  #   Regexp.last_match(2) # => nil
  #
  # With negative integer argument +n+, counts backwards from the last field:
  #
  #   Regexp.last_match(-1)       # => "a"
  #
  # With string or symbol argument +name+,
  # returns the string value for the named capture, if any:
  #
  #   /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ 'var = val'
  #   Regexp.last_match        # => #<MatchData "var = val" lhs:"var"rhs:"val">
  #   Regexp.last_match(:lhs)  # => "var"
  #   Regexp.last_match('rhs') # => "val"
  #   Regexp.last_match('foo') # Raises IndexError.
  def self.last_match(...) end

  # Returns +true+ if matching against <tt>re</tt> can be
  # done in linear time to the input string.
  #
  #   Regexp.linear_time?(/re/) # => true
  #
  # Note that this is a property of the ruby interpreter, not of the argument
  # regular expression.  Identical regexp can or cannot run in linear time
  # depending on your ruby binary.  Neither forward nor backward compatibility
  # is guaranteed about the return value of this method.  Our current algorithm
  # is (*1) but this is subject to change in the future.  Alternative
  # implementations can also behave differently.  They might always return
  # false for everything.
  #
  # (*1): https://doi.org/10.1109/SP40001.2021.00032
  def self.linear_time?(...) end

  # Returns a new string that escapes any characters
  # that have special meaning in a regular expression:
  #
  #   s = Regexp.escape('\*?{}.')      # => "\\\\\\*\\?\\{\\}\\."
  #
  # For any string +s+, this call returns a MatchData object:
  #
  #   r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
  #   r.match(s)                       # => #<MatchData "\\\\\\*\\?\\{\\}\\.">
  def self.quote(p1) end

  # It returns the current default timeout interval for Regexp matching in second.
  # +nil+ means no default timeout configuration.
  def self.timeout; end

  # It sets the default timeout interval for Regexp matching in second.
  # +nil+ means no default timeout configuration.
  # This configuration is process-global. If you want to set timeout for
  # each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
  #
  #    Regexp.timeout = 1
  #    /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
  def self.timeout=(p1) end

  # Returns +object+ if it is a regexp:
  #
  #   Regexp.try_convert(/re/) # => /re/
  #
  # Otherwise if +object+ responds to <tt>:to_regexp</tt>,
  # calls <tt>object.to_regexp</tt> and returns the result.
  #
  # Returns +nil+ if +object+ does not respond to <tt>:to_regexp</tt>.
  #
  #   Regexp.try_convert('re') # => nil
  #
  # Raises an exception unless <tt>object.to_regexp</tt> returns a regexp.
  def self.try_convert(object) end

  # Returns a new regexp that is the union of the given patterns:
  #
  #   r = Regexp.union(%w[cat dog])      # => /cat|dog/
  #   r.match('cat')      # => #<MatchData "cat">
  #   r.match('dog')      # => #<MatchData "dog">
  #   r.match('cog')      # => nil
  #
  # For each pattern that is a string, <tt>Regexp.new(pattern)</tt> is used:
  #
  #   Regexp.union('penzance')             # => /penzance/
  #   Regexp.union('a+b*c')                # => /a\+b\*c/
  #   Regexp.union('skiing', 'sledding')   # => /skiing|sledding/
  #   Regexp.union(['skiing', 'sledding']) # => /skiing|sledding/
  #
  # For each pattern that is a regexp, it is used as is,
  # including its flags:
  #
  #   Regexp.union(/foo/i, /bar/m, /baz/x)
  #   # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
  #   Regexp.union([/foo/i, /bar/m, /baz/x])
  #   # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
  #
  # With no arguments, returns <tt>/(?!)/</tt>:
  #
  #   Regexp.union # => /(?!)/
  #
  # If any regexp pattern contains captures, the behavior is unspecified.
  def self.union(...) end

  # With argument +string+ given, returns a new regexp with the given string
  # and options:
  #
  #   r = Regexp.new('foo') # => /foo/
  #   r.source              # => "foo"
  #   r.options             # => 0
  #
  # Optional argument +options+ is one of the following:
  #
  # - A String of options:
  #
  #     Regexp.new('foo', 'i')  # => /foo/i
  #     Regexp.new('foo', 'im') # => /foo/im
  #
  # - The bit-wise OR of one or more of the constants
  #   Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
  #   Regexp::NOENCODING:
  #
  #     Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
  #     Regexp.new('foo', Regexp::EXTENDED)   # => /foo/x
  #     Regexp.new('foo', Regexp::MULTILINE)  # => /foo/m
  #     Regexp.new('foo', Regexp::NOENCODING)  # => /foo/n
  #     flags = Regexp::IGNORECASE | Regexp::EXTENDED |  Regexp::MULTILINE
  #     Regexp.new('foo', flags)              # => /foo/mix
  #
  # - +nil+ or +false+, which is ignored.
  # - Any other truthy value, in which case the regexp will be
  #   case-insensitive.
  #
  # If optional keyword argument +timeout+ is given,
  # its float value overrides the timeout interval for the class,
  # Regexp.timeout.
  # If +nil+ is passed as +timeout, it uses the timeout interval
  # for the class, Regexp.timeout.
  #
  # With argument +regexp+ given, returns a new regexp. The source,
  # options, timeout are the same as +regexp+. +options+ and +n_flag+
  # arguments are ineffective.  The timeout can be overridden by
  # +timeout+ keyword.
  #
  #     options = Regexp::MULTILINE
  #     r = Regexp.new('foo', options, timeout: 1.1) # => /foo/m
  #     r2 = Regexp.new(r)                           # => /foo/m
  #     r2.timeout                                   # => 1.1
  #     r3 = Regexp.new(r, timeout: 3.14)            # => /foo/m
  #     r3.timeout                                   # => 3.14
  def initialize(...) end

  # Returns +true+ if +self+ finds a match in +string+:
  #
  #   /^[a-z]*$/ === 'HELLO' # => false
  #   /^[A-Z]*$/ === 'HELLO' # => true
  #
  # This method is called in case statements:
  #
  #   s = 'HELLO'
  #   case s
  #   when /\A[a-z]*\z/; print "Lower case\n"
  #   when /\A[A-Z]*\z/; print "Upper case\n"
  #   else               print "Mixed case\n"
  #   end # => "Upper case"
  def ===(string) end

  # Returns the integer index (in characters) of the first match
  # for +self+ and +string+, or +nil+ if none;
  # also sets the
  # {rdoc-ref:Regexp global variables}[rdoc-ref:Regexp@Global+Variables]:
  #
  #   /at/ =~ 'input data' # => 7
  #   $~                   # => #<MatchData "at">
  #   /ax/ =~ 'input data' # => nil
  #   $~                   # => nil
  #
  # Assigns named captures to local variables of the same names
  # if and only if +self+:
  #
  # - Is a regexp literal;
  #   see {Regexp Literals}[rdoc-ref:literals.rdoc@Regexp+Literals].
  # - Does not contain interpolations;
  #   see {Regexp interpolation}[rdoc-ref:Regexp@Interpolation+Mode].
  # - Is at the left of the expression.
  #
  # Example:
  #
  #   /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ '  x = y  '
  #   p lhs # => "x"
  #   p rhs # => "y"
  #
  # Assigns +nil+ if not matched:
  #
  #   /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ '  x = '
  #   p lhs # => nil
  #   p rhs # => nil
  #
  # Does not make local variable assignments if +self+ is not a regexp literal:
  #
  #   r = /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
  #   r =~ '  x = y  '
  #   p foo # Undefined local variable
  #   p bar # Undefined local variable
  #
  # The assignment does not occur if the regexp is not at the left:
  #
  #   '  x = y  ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
  #   p foo, foo # Undefined local variables
  #
  # A regexp interpolation, <tt>#{}</tt>, also disables
  # the assignment:
  #
  #   r = /(?<foo>\w+)/
  #   /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
  #   p foo # Undefined local variable
  def =~(string) end

  # Equivalent to <tt><i>rxp</i> =~ $_</tt>:
  #
  #   $_ = "input data"
  #   ~ /at/ # => 7
  def ~; end

  # Returns +true+ if the case-insensitivity flag in +self+ is set,
  # +false+ otherwise:
  #
  #   /a/.casefold?           # => false
  #   /a/i.casefold?          # => true
  #   /(?i:a)/.casefold?      # => false
  def casefold?; end

  # Returns the Encoding object that represents the encoding of obj.
  def encoding; end

  # Returns +true+ if +object+ is another \Regexp whose pattern,
  # flags, and encoding are the same as +self+, +false+ otherwise:
  #
  #   /foo/ == Regexp.new('foo')                          # => true
  #   /foo/ == /foo/i                                     # => false
  #   /foo/ == Regexp.new('food')                         # => false
  #   /foo/ == Regexp.new("abc".force_encoding("euc-jp")) # => false
  def eql?(other) end
  alias == eql?

  # Returns +false+ if +self+ is applicable to
  # a string with any ASCII-compatible encoding;
  # otherwise returns +true+:
  #
  #   r = /a/                                          # => /a/
  #   r.fixed_encoding?                               # => false
  #   r.match?("\u{6666} a")                          # => true
  #   r.match?("\xa1\xa2 a".force_encoding("euc-jp")) # => true
  #   r.match?("abc".force_encoding("euc-jp"))        # => true
  #
  #   r = /a/u                                        # => /a/
  #   r.fixed_encoding?                               # => true
  #   r.match?("\u{6666} a")                          # => true
  #   r.match?("\xa1\xa2".force_encoding("euc-jp"))   # Raises exception.
  #   r.match?("abc".force_encoding("euc-jp"))        # => true
  #
  #   r = /\u{6666}/                                  # => /\u{6666}/
  #   r.fixed_encoding?                               # => true
  #   r.encoding                                      # => #<Encoding:UTF-8>
  #   r.match?("\u{6666} a")                          # => true
  #   r.match?("\xa1\xa2".force_encoding("euc-jp"))   # Raises exception.
  #   r.match?("abc".force_encoding("euc-jp"))        # => false
  def fixed_encoding?; end

  # Returns the integer hash value for +self+.
  #
  # Related: Object#hash.
  def hash; end

  # Returns a nicely-formatted string representation of +self+:
  #
  #   /ab+c/ix.inspect # => "/ab+c/ix"
  #
  # Related: Regexp#to_s.
  def inspect; end

  # With no block given, returns the MatchData object
  # that describes the match, if any, or +nil+ if none;
  # the search begins at the given character +offset+ in +string+:
  #
  #   /abra/.match('abracadabra')      # => #<MatchData "abra">
  #   /abra/.match('abracadabra', 4)   # => #<MatchData "abra">
  #   /abra/.match('abracadabra', 8)   # => nil
  #   /abra/.match('abracadabra', 800) # => nil
  #
  #   string = "\u{5d0 5d1 5e8 5d0}cadabra"
  #   /abra/.match(string, 7)          #=> #<MatchData "abra">
  #   /abra/.match(string, 8)          #=> nil
  #   /abra/.match(string.b, 8)        #=> #<MatchData "abra">
  #
  # With a block given, calls the block if and only if a match is found;
  # returns the block's value:
  #
  #   /abra/.match('abracadabra') {|matchdata| p matchdata }
  #   # => #<MatchData "abra">
  #   /abra/.match('abracadabra', 4) {|matchdata| p matchdata }
  #   # => #<MatchData "abra">
  #   /abra/.match('abracadabra', 8) {|matchdata| p matchdata }
  #   # => nil
  #   /abra/.match('abracadabra', 8) {|marchdata| fail 'Cannot happen' }
  #   # => nil
  #
  # Output (from the first two blocks above):
  #
  #   #<MatchData "abra">
  #   #<MatchData "abra">
  #
  #    /(.)(.)(.)/.match("abc")[2] # => "b"
  #    /(.)(.)/.match("abc", 1)[2] # => "c"
  def match(string, offset = 0) end

  # Returns <code>true</code> or <code>false</code> to indicate whether the
  # regexp is matched or not without updating $~ and other related variables.
  # If the second parameter is present, it specifies the position in the string
  # to begin the search.
  #
  #    /R.../.match?("Ruby")    # => true
  #    /R.../.match?("Ruby", 1) # => false
  #    /P.../.match?("Ruby")    # => false
  #    $&                       # => nil
  def match?(...) end

  # Returns a hash representing named captures of +self+
  # (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]):
  #
  # - Each key is the name of a named capture.
  # - Each value is an array of integer indexes for that named capture.
  #
  # Examples:
  #
  #   /(?<foo>.)(?<bar>.)/.named_captures # => {"foo"=>[1], "bar"=>[2]}
  #   /(?<foo>.)(?<foo>.)/.named_captures # => {"foo"=>[1, 2]}
  #   /(.)(.)/.named_captures             # => {}
  def named_captures; end

  # Returns an array of names of captures
  # (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]):
  #
  #   /(?<foo>.)(?<bar>.)(?<baz>.)/.names # => ["foo", "bar", "baz"]
  #   /(?<foo>.)(?<foo>.)/.names          # => ["foo"]
  #   /(.)(.)/.names                      # => []
  def names; end

  # Returns an integer whose bits show the options set in +self+.
  #
  # The option bits are:
  #
  #   Regexp::IGNORECASE # => 1
  #   Regexp::EXTENDED   # => 2
  #   Regexp::MULTILINE  # => 4
  #
  # Examples:
  #
  #   /foo/.options    # => 0
  #   /foo/i.options   # => 1
  #   /foo/x.options   # => 2
  #   /foo/m.options   # => 4
  #   /foo/mix.options # => 7
  #
  # Note that additional bits may be set in the returned integer;
  # these are maintained internally in +self+, are ignored if passed
  # to Regexp.new, and may be ignored by the caller:
  #
  # Returns the set of bits corresponding to the options used when
  # creating this regexp (see Regexp::new for details). Note that
  # additional bits may be set in the returned options: these are used
  # internally by the regular expression code. These extra bits are
  # ignored if the options are passed to Regexp::new:
  #
  #   r = /\xa1\xa2/e                 # => /\xa1\xa2/
  #   r.source                        # => "\\xa1\\xa2"
  #   r.options                       # => 16
  #   Regexp.new(r.source, r.options) # => /\xa1\xa2/
  def options; end

  # Returns the original string of +self+:
  #
  #   /ab+c/ix.source # => "ab+c"
  #
  # Regexp escape sequences are retained:
  #
  #   /\x20\+/.source  # => "\\x20\\+"
  #
  # Lexer escape characters are not retained:
  #
  #   /\//.source  # => "/"
  def source; end

  # It returns the timeout interval for Regexp matching in second.
  # +nil+ means no default timeout configuration.
  #
  # This configuration is per-object. The global configuration set by
  # Regexp.timeout= is ignored if per-object configuration is set.
  #
  #    re = Regexp.new("^a*b?a*$", timeout: 1)
  #    re.timeout               #=> 1.0
  #    re =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
  def timeout; end

  # Returns a string showing the options and string of +self+:
  #
  #   r0 = /ab+c/ix
  #   s0 = r0.to_s # => "(?ix-m:ab+c)"
  #
  # The returned string may be used as an argument to Regexp.new,
  # or as interpolated text for a
  # {Regexp interpolation}[rdoc-ref:Regexp@Interpolation+Mode]:
  #
  #   r1 = Regexp.new(s0) # => /(?ix-m:ab+c)/
  #   r2 = /#{s0}/        # => /(?ix-m:ab+c)/
  #
  # Note that +r1+ and +r2+ are not equal to +r0+
  # because their original strings are different:
  #
  #   r0 == r1  # => false
  #   r0.source # => "ab+c"
  #   r1.source # => "(?ix-m:ab+c)"
  #
  # Related: Regexp#inspect.
  def to_s; end

  class TimeoutError < RegexpError
  end
end
