mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
a3099a3b4a
Also increase user name up to 128. The work was started by Rucha Deodhar <rucha.deodhar@mariadb.com>, contains audit plugin fixes by Alexey Botchkov <holyfoot@askmonty.org>.
212 lines
8.4 KiB
Text
212 lines
8.4 KiB
Text
/* Copyright (c) 2019, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
#ifndef NO_EMBEDDED_ACCESS_CHECKS
|
|
|
|
#define magic_bits 30
|
|
/*
|
|
Returns a number which, if sorted in descending order, magically puts
|
|
patterns in the order from most specific (e.g. no wildcards) to most generic
|
|
(e.g. "%"). That is, the larger the number, the more specific the pattern is.
|
|
|
|
Takes a template that lists types of following patterns (by the first letter
|
|
of _h_ostname, _d_bname, _u_sername) and up to four patterns.
|
|
No more than two can be of 'h' or 'd' type (because one magic value takes
|
|
magic_bits bits, see below).
|
|
|
|
========================================================================
|
|
|
|
Here's how the magic is created:
|
|
|
|
Let's look at one iteration of the for() loop. That's one pattern. With
|
|
wildcards (usernames aren't interesting).
|
|
|
|
By definition a pattern A is "more specific" than pattern B if the set of
|
|
strings that match the pattern A is smaller than the set of strings that
|
|
match the pattern B. Strings are taken from the big superset of all valid
|
|
utf8 strings up to the maxlen.
|
|
|
|
Strings are matched character by character. For every non-wildcard
|
|
character there can be only one matching character in the matched string.
|
|
|
|
For a wild_one character ('_') any valid utf8 character will do. Below
|
|
numchars would mean a total number of vaid utf8 characters. It's a huge
|
|
number. A number of matching strings for wild_one will be numchars.
|
|
|
|
For a wild_many character ('%') any number of valid utf8 characters will do.
|
|
How many string will match it depends on the amount of non-wild_many
|
|
characters. Say, if a number of non-wildcard characters is N, and a number
|
|
of wild_one characters is M, and the number of wild_many characters is K,
|
|
then for K=1 its wild_many character will match any number of valid utf8
|
|
characters from 0 to L=maxlen-N-M. The number of matching strings will be
|
|
|
|
1 + numchars + numchars^2 + numchars^3 + ... + numchars^L
|
|
|
|
Intermediate result: if M=K=0, the pattern will match only one string,
|
|
if M>0, K=0, the pattern will match numchars^M strings, if K=1, the
|
|
pattern will match
|
|
|
|
numchars^M + 1 + numchars + numchars^2 + ... + numchars^L
|
|
|
|
For a more visual notation, let's write these huge numbers not as
|
|
decimal or binary, but base numchars. Then the last number will be
|
|
a sum of two numbers: the first is one followed by M zeros, the second
|
|
constists of L+1 ones:
|
|
|
|
1000{...M...}000 + 111{...L+1...}1111
|
|
|
|
This could produce any of the following
|
|
|
|
111...112111...1111 if L > M, K = 1
|
|
100...001111...1111 if M > L, K = 1
|
|
2111111...111111111 if M = L, K = 1
|
|
1111111...111111111 if M = 0, K = 1
|
|
1000000...000000000 if K = 0, M > 0
|
|
|
|
There are two complications caused by multiple wild_many characters.
|
|
For, say, two wild_many characters, either can accept any number of utf8
|
|
characters, as long the the total amount of them is less then or equal to L.
|
|
Same logic applies to any number of non-consequent wild_many characters
|
|
(consequent wild_many characters count as one). This gives the number of
|
|
matching strings of
|
|
|
|
1 + F(K,1)*numchars + F(K,2)*numchars^2 + ... + F(K,L)*numchars^L
|
|
|
|
where F(K,R) is the "number of ways one can put R balls into K boxes",
|
|
that is C^{K-1}_{R+K-1}.
|
|
|
|
In the "base numchars" notation, it means that besides 0, 1, and 2,
|
|
an R-th digit can be F(K,R). For the purpose of comparison, we only need
|
|
to know the most significant digit, F(K, L).
|
|
While it can be huge, we don't need the exact value, it's a
|
|
a monotonously increasing function of K, so if K1>K2, F(K1,L) > F(K2,L)
|
|
and we can simply compare values of K instead of complex F(K,L).
|
|
|
|
The second complication: F(K,R) gives only an upper boundary, the
|
|
actual number of matched strings can be smaller.
|
|
Example: pattern "a%b%c" can match "abbc" as a(b)b()c, and as a()b(b)c.
|
|
F(2,1) = 2, but it's only one string "abbc".
|
|
We'll ignore it here under assumption that it almost never happens
|
|
in practice and this simplification won't noticeably disrupt the ordering.
|
|
|
|
The last detail: old get_sort function sorted by the non-wildcard prefix
|
|
length, so in "abc_" and "a_bc" the former one was sorted first. Strictly
|
|
speaking they're both equally specific, but to preserve the backward
|
|
compatible sorting we'll use the P "prefix length or 0 if no wildcards"
|
|
to break ties.
|
|
|
|
Now, let's compare two long numbers. Numbers are easy to compare,
|
|
the longer number is larger. If they both have the same lengths,
|
|
the one with the larger first digit is larger, and so on.
|
|
|
|
But there is no need to actually calculate these numbers.
|
|
Three numbers L, K, M (and P to break ties) are enough to describe a pattern
|
|
for a purpose of comparison. L/K/M triplets can be compared like this:
|
|
|
|
* case 1: if for both patterns L>M: compare L, K, M, in that order
|
|
because:
|
|
- if L1 > L2, the first number is longer
|
|
- If L1 == L2, then the first digit is a monotonously increasing function
|
|
of K, so the first digit is larger when K is larger
|
|
- if K1 == K2, then all other digits in these numbers would be the
|
|
same too, with the exception of one digit in the middle that
|
|
got +1 because of +1000{...M...}000. So, whatever number has a
|
|
larger M will get this +1 first.
|
|
* case 2: if for both patterns L<M: compare M, L, K, in that order
|
|
* case 3: if for both patterns L=M: compare L (or M), K
|
|
* case 4: if one L1>M1, other L2=M2: compare L, K, M
|
|
* case 5: if one L1<M1, other L2=M2: compare M, L, K
|
|
* case 6: if one pattern L1>M1, the other M2>L2: first is more generic
|
|
unless (case 6a) K1=K2=1,M1=0,M2=L2+1 (in that case - equal)
|
|
|
|
note that in case 3 one can use a rule from the case either 1 or 2,
|
|
in the case 4 one can use the rule from the case 1,
|
|
in the case 5 one can use the rule from the case 2.
|
|
|
|
for the case 6 and ignoring the special case 6a, to compare patterns by a
|
|
magic number as a function z(a,b,c), we must ensure that z(L1,K1,M1) is
|
|
greater than z(M2,L2,K2) when L1=M2. This can be done by an extra bit,
|
|
which is 1 for K and 0 for L. Thus, the magic number could be
|
|
|
|
case 1: (((L*2 + 1)*(maxlen+1) + K)*(maxlen+1) + M)*(maxlen+1) + P
|
|
case 2: ((M*2*(maxlen+1) + L)*(maxlen+1) + K)*(maxlen+1) + P
|
|
|
|
upper bound: L<=maxlen, M<=maxlen, K<=maxlen/2, P<maxlen
|
|
for a current maxlen=64, the magic number needs magic_bits bits.
|
|
*/
|
|
|
|
static ulonglong get_magic_sort(const char *templ, ...)
|
|
{
|
|
ulonglong sort=0;
|
|
va_list args;
|
|
va_start(args, templ);
|
|
|
|
IF_DBUG(uint bits_used= 0,);
|
|
|
|
for (; *templ; templ++)
|
|
{
|
|
char *pat= va_arg(args, char*);
|
|
|
|
if (*templ == 'u')
|
|
{
|
|
/* Username. Can be empty (= anybody) or a literal. Encoded in one bit */
|
|
sort= (sort << 1) + !*pat;
|
|
IF_DBUG(bits_used++,);
|
|
continue;
|
|
}
|
|
|
|
/* A wildcard pattern. Encoded in magic_bits bits. */
|
|
uint maxlen= *templ == 'd' ? max_dbname_length : max_hostname_length;
|
|
DBUG_ASSERT(maxlen <= 255);
|
|
DBUG_ASSERT(*templ == 'd' || *templ == 'h');
|
|
|
|
uint N= 0, M= 0, K= 0, P= 0;
|
|
for (uint i=0; pat[i]; i++)
|
|
{
|
|
if (pat[i] == wild_many)
|
|
{
|
|
if (!K && !M) P= N;
|
|
K++;
|
|
while (pat[i+1] == wild_many) i++;
|
|
continue;
|
|
}
|
|
if (pat[i] == wild_one)
|
|
{
|
|
if (!K && !M) P= N;
|
|
M++;
|
|
continue;
|
|
}
|
|
if (pat[i] == wild_prefix && pat[i+1]) i++;
|
|
N++;
|
|
}
|
|
|
|
set_if_smaller(K, 31);
|
|
set_if_smaller(M, 31);
|
|
|
|
ulonglong L= K ? maxlen - N - M : 0, d= maxlen + 1, magic;
|
|
ulonglong d1= MY_MIN(d, 32);
|
|
if (L > M)
|
|
magic= (((L * 2 + 1) * d + K) * d1 + M) * d + P;
|
|
else
|
|
magic= (((M * 2 + 0) * d + L) * d1 + K) * d + P;
|
|
DBUG_ASSERT(magic < (1ULL << magic_bits));
|
|
sort= (sort << magic_bits) + magic;
|
|
IF_DBUG(bits_used+= magic_bits,);
|
|
}
|
|
DBUG_ASSERT(bits_used < 8*sizeof(sort));
|
|
va_end(args);
|
|
return ~sort;
|
|
}
|
|
#endif
|