#!/usr/bin/perl -w # # not_in_wrong_results.pl # # Illustrate a problem with NOT IN in MySQL 5.0.20a and later # # You may want to run this a few times to see how the server will react # differently, given different randomly generated sets. use strict; use Getopt::Std; use Term::ReadKey; use DBI; my $MAX_VALUE = 1100; my ($dbhost, $dbname, $user, $pass, $dbh, $qstr, $qry, @lst, $uniq); $dbhost = "localhost"; $dbname = "test"; $user = getpwuid(%<); &get_options; $pass = &get_password; $dbh = DBI->connect("DBI:mysql:$dbname:$dbhost", $user, $pass) or die "Couldn't connect to DB: " . $DBI::errstr; # create the table $dbh->do("DROP TABLE IF EXISTS not_in"); $dbh->do("CREATE TABLE not_in (value int(11) NOT NULL)"); $dbh->do("ALTER TABLE not_in ADD INDEX (value)"); # fill it up for (my $i = 0; $i < $MAX_VALUE; $i++) { $dbh->do("INSERT INTO not_in (value) VALUES ($i)"); } ########################################################################### # this will fail; it has 1000 random but valid values in the NOT IN list # it doesn't seem to matter if elements are repeated # this returns too few results @lst = &random_list(1000); $uniq = &num_uniq(@lst); $qstr = "SELECT value FROM not_in WHERE value NOT IN (" . join(",", @lst) . ")"; $qry = $dbh->prepare($qstr); $qry->execute; print "Got " . $qry->rows . " from a random set with 1000 elements" . " (expecting " . ($MAX_VALUE - $uniq) . ")\n"; $qry->finish; ########################################################################### # This fails # Note these are the same numbers, just sorted. @lst = sort(@lst); $qstr = "SELECT value FROM not_in WHERE value NOT IN (" . join(",", @lst) . ")"; $qry = $dbh->prepare($qstr); $qry->execute; print "Got " . $qry->rows . " from an ordered, non-contiguous set" . " with 1000 elements (expecting " . ($MAX_VALUE - $uniq) . ")\n"; $qry->finish; ############################################################################# # this will succeed; though it has 1000 items, they are ordered and sequential $qstr = "SELECT value FROM not_in WHERE value NOT IN (" . join(", ", 1..1000) . ")"; $qry = $dbh->prepare($qstr); $qry->execute; print "Got " . $qry->rows . " from an ordered, contiguous set" . " of 1000 elements (expecting " . ($MAX_VALUE - 1000) . ")\n"; $qry->finish; ######################################################################### # This succeeds. # The elements are random and non-sequential, but there are only 999 of them. @lst = &random_list(999); $uniq = &num_uniq(@lst); $qstr = "SELECT value FROM not_in WHERE value NOT IN (" . join(",", @lst) . ")"; $qry = $dbh->prepare($qstr); $qry->execute; print "Got " . $qry->rows . " from an ordered, non-contiguous set" . " with 999 elements (expecting " . ($MAX_VALUE - $uniq) . ")\n"; $qry->finish; $dbh->do("DROP TABLE not_in"); $dbh->disconnect; ####################################################################### ############ End of tests ############################################# ####################################################################### # # Get a list of random integers on [0, $MAX_VALUE) # sub random_list($) { my ($num) = @_; my (@ret); for (my $i = 0; $i < $num; $i++) { push(@ret, int(rand() * $MAX_VALUE)); } return @ret } # random_list # # Figure out how many unique values are in a list # sub num_uniq(@) { my (@lst) = @_; my (%seen, $item, @uniq); foreach $item (@lst) { push(@uniq, $item) unless $seen{$item}++; } return int(@uniq); } # num_uniq # # Get specified command line options # sub get_options { my (%opts); getopts("s:d:u:h", \%opts); if ($opts{s}) { $dbhost = $opts{s}; } if ($opts{d}) { $dbname = $opts{d}; } if ($opts{u}) { $user = $opts{u}; } if ($opts{h}) { print STDERR &usage . "\n"; die; } } # get_options # # Prompt the user for a password # sub get_password { my ($pass); ReadMode 'noecho'; print STDERR "Password: "; $pass = ReadLine 0; chomp($pass); print STDERR "\n"; ReadMode 'normal'; return $pass; } # get_password # # Get a usage message # sub usage { "Usage: not_in_wron_results.pl [ -s DB_SERVER ] [ -d DB_NAME ] [ -u USER ]" } # usage