Powered By Blogger

Thursday, 7 June 2012

query to find alll the the duplicate records

## the table syntax is as follows:
mysql> show create table house \G
*************************** 1. row ***************************
       Table: house
Create Table: CREATE TABLE `house` (
  `houseno` int(11) DEFAULT NULL,
  `name` varchar(10) DEFAULT NULL,
  `tin_id` int(11) NOT NULL AUTO_INCREMENT,
  PRIMARY KEY (`tin_id`),
  KEY `idx_name` (`name`),
  KEY `idx_name1` (`name`,`houseno`),
  KEY `idx_house2` (`houseno`,`name`)
) ENGINE=InnoDB AUTO_INCREMENT=12 DEFAULT CHARSET=latin1
1 row in set (0.00 sec)
mysql> select * from house;
+---------+------+--------+
| houseno | name | tin_id |
+---------+------+--------+
|       1 | a    |      2 |
|       1 | a    |      4 |
|       2 | a    |      6 |
|       3 | b    |      8 |
|       3 | c    |     22 |
|       3 | c    |     24 |
|       4 | c    |     10 |
|       4 | c    |     12 |
|       4 | c    |     14 |
|       4 | c    |     16 |
|       4 | c    |     18 |
|       4 | c    |     20 |
+---------+------+--------+
12 rows in set (0.00 sec)
mysql> select max(tin_id),min(tin_id),houseno,name from house group by houseno,name having count(*)>
1;
+-------------+-------------+---------+------+
| max(tin_id) | min(tin_id) | houseno | name |
+-------------+-------------+---------+------+
|           4 |           2 |       1 | a    |
|          24 |          22 |       3 | c    |
|          20 |          10 |       4 | c    |
+-------------+-------------+---------+------+
3 rows in set (0.00 sec)

## if we want only tin_id the query is as follows:
SELECT h1.tin_id FROM house h1 WHERE EXISTS (SELECT NULL FROM house h2 WHERE h1.houseno = h2.houseno AND h1.name = h2.name AND h2.tin_id <> h1.tin_id) ;
+--------+
| tin_id |
+--------+
|      2 |
|      4 |
|     22 |
|     24 |
|     10 |
|     12 |
|     14 |
|     16 |
|     18 |
|     20 |
+--------+
10 rows in set (0.00 sec)

## if we want to see the set of all duplicate records the query is as follows:
SELECT h1.* FROM house h1 WHERE EXISTS (SELECT NULL FROM house h2 WHERE h1.houseno = h2.houseno AND h1.name = h2.name AND h2.tin_id <> h1.tin_id);
+---------+------+--------+
| houseno | name | tin_id |
+---------+------+--------+
|       1 | a    |      2 |
|       1 | a    |      4 |
|       3 | c    |     22 |
|       3 | c    |     24 |
|       4 | c    |     10 |
|       4 | c    |     12 |
|       4 | c    |     14 |
|       4 | c    |     16 |
|       4 | c    |     18 |
|       4 | c    |     20 |
+---------+------+--------+
10 rows in set (0.00 sec)
## The other best solution is as follows:

SELECT tin_id
 FROM (SELECT houseno, name
         FROM house
       GROUP BY houseno, name
       HAVING count(*) > 1) temp,
      house h
WHERE temp.houseno = h.houseno AND temp.name = h.name;