I’m building a tweet database with PHP and MySQL using the Twitter streaming API. Along with the “tweets” table, there are separate tables to keep track of mentions, urls, hashtags and users.
I have it periodically checking and only keeping the most recent 1000 tweets for each of my keywords, deleting the rest from the database. What I would like to do is clean up the data in the associated tables as well (By associated tables, I mean the mentions, urls, hashtags and users tables).
I’m wondering what the most efficient way to delete all mentions, urls, hashtags and users that do not have a matching tweet_id in the “tweets” table would be.
Table structures:
--
-- Table structure for table `tweets`
--
CREATE TABLE IF NOT EXISTS `tweets` (
`tweet_id` bigint(20) unsigned NOT NULL,
`tweet_text` varchar(200) NOT NULL,
`entities` text NOT NULL,
`created_at` datetime NOT NULL,
`geo_lat` decimal(10,5) DEFAULT NULL,
`geo_long` decimal(10,5) DEFAULT NULL,
`user_id` int(10) unsigned NOT NULL,
`screen_name` char(20) NOT NULL,
`name` varchar(40) DEFAULT NULL,
`profile_image_url` varchar(200) DEFAULT NULL,
`tweet_keywords` varchar(128) NOT NULL,
PRIMARY KEY (`tweet_id`),
KEY `created_at` (`created_at`),
KEY `user_id` (`user_id`),
KEY `screen_name` (`screen_name`),
KEY `name` (`name`),
FULLTEXT KEY `tweet_text` (`tweet_text`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- Table structure for table `tweet_mentions`
--
CREATE TABLE IF NOT EXISTS `tweet_mentions` (
`tweet_id` bigint(20) NOT NULL,
`source_user_id` bigint(20) NOT NULL,
`target_user_id` bigint(20) NOT NULL,
KEY `tweet_id` (`tweet_id`),
KEY `source` (`source_user_id`),
KEY `target` (`target_user_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- Table structure for table `tweet_tags`
--
CREATE TABLE IF NOT EXISTS `tweet_tags` (
`tweet_id` bigint(20) NOT NULL,
`tag` varchar(100) NOT NULL,
KEY `tweet_id` (`tweet_id`),
KEY `tag` (`tag`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- Table structure for table `tweet_urls`
--
CREATE TABLE IF NOT EXISTS `tweet_urls` (
`tweet_id` bigint(20) NOT NULL,
`url` varchar(140) NOT NULL,
KEY `tweet_id` (`tweet_id`),
KEY `url` (`url`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- Table structure for table `users`
--
CREATE TABLE IF NOT EXISTS `users` (
`user_id` bigint(20) unsigned NOT NULL,
`screen_name` varchar(20) NOT NULL,
`name` varchar(40) DEFAULT NULL,
`profile_image_url` varchar(200) DEFAULT NULL,
`location` varchar(30) DEFAULT NULL,
`url` varchar(200) DEFAULT NULL,
`description` varchar(200) DEFAULT NULL,
`created_at` datetime NOT NULL,
`followers_count` int(10) unsigned DEFAULT NULL,
`friends_count` int(10) unsigned DEFAULT NULL,
`statuses_count` int(10) unsigned DEFAULT NULL,
`time_zone` varchar(40) DEFAULT NULL,
`last_update` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`user_id`),
KEY `user_name` (`name`),
KEY `last_update` (`last_update`),
KEY `screen_name` (`screen_name`),
FULLTEXT KEY `description` (`description`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
Have a look at foreign key constraints.