-
-
Save kvignos/b41a61564aea2cbcaf69 to your computer and use it in GitHub Desktop.
Data Integrity Checks for WordPress Migrations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
$time_start = microtime( true ); | |
//mysql setup | |
$host = 'localhost'; | |
$user = 'wordpress'; | |
$pass = 'wordpress'; | |
$db = 'wordpress'; | |
$mysqli = new mysqli( $host, $user, $pass, $db ); | |
$warnings = 0; | |
$successes = 0; | |
/*****************************************/ | |
/* VALIDATION */ | |
/*****************************************/ | |
/* no homeless, uncategorized posts */ | |
$sql = "SELECT * FROM wp_term_relationships WHERE term_taxonomy_id IN (SELECT term_taxonomy_id FROM wp_term_taxonomy WHERE taxonomy='category' AND term_id IN (SELECT term_id FROM wp_terms WHERE slug='uncategorized'))"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$object_id = $row['object_id']; | |
printf( "WARNING: Post " . $object_id . " has an invalid category.\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No uncategorized posts found." . "\n" ); | |
$successes++; | |
} | |
/* no phantom authors - posts should not have post_authors that don't exist */ | |
$sql = "SELECT wp_posts.ID FROM wp_posts LEFT JOIN wp_users ON wp_posts.post_author = wp_users.ID WHERE wp_users.ID IS NULL GROUP BY wp_posts.post_author"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$post_id = $row['ID']; | |
printf( "WARNING: Post " . $post_id . " has an invalid author.\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No invalid authors found." . "\n" ); | |
$successes++; | |
} | |
/*****************************************/ | |
/* DUPLICATES */ | |
/*****************************************/ | |
/* no redirect posts aka cross posts */ | |
$sql = "SELECT wp_posts.ID FROM wp_posts WHERE ID IN (SELECT post_id FROM wp_postmeta WHERE meta_key = '_yoast_wpseo_redirect' AND meta_value LIKE '%20%') AND post_type = 'post'"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$post_id = $row['ID']; | |
printf( "WARNING: Post " . $post_id . " is a redirect aka cross post. \n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No redirect aka cross posts found." . "\n" ); | |
$successes++; | |
} | |
/* no duplicate posts - no posts with two rows in publish status with matching post date and post title */ | |
$sql = "SELECT post_date, post_title, post_name, post_content, COUNT(*) as count FROM wp_posts WHERE post_status = 'publish' and post_type = 'post' and post_title != '' GROUP BY post_title, post_date, post_content HAVING COUNT(*) > 1"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$post_date = $row['post_date']; | |
$post_title = $row['post_title']; | |
$post_name = $row['post_name']; | |
$count = $row['count']; | |
printf( "WARNING: Duplicate post by date, title, and content: " . $post_date . " | " . $post_title . " | " . $post_name . " | " . $count . "\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No duplicate posts found by date, title, and content." . "\n" ); | |
$successes++; | |
} | |
/* no duplicate guids */ | |
$sql = "SELECT guid, post_date, post_name, COUNT(*) as count FROM wp_posts WHERE post_status = 'publish' GROUP BY guid, post_date, post_name HAVING COUNT(*) > 1"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$guid = $row['guid']; | |
$count = $row['count']; | |
printf( "WARNING: Duplicate post by guid: " . $guid . " | " . $count . "\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No duplicate posts found by guid." . "\n" ); | |
$successes++; | |
} | |
/*****************************************/ | |
/* ORPHANS */ | |
/*****************************************/ | |
/* attachment mapping - no orphan attachments */ | |
$sql = "SELECT wp_posts.ID FROM wp_posts WHERE post_type = 'attachment' AND post_parent != 0 AND post_parent NOT IN (SELECT ID from wp_posts)"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$post_id = $row['ID']; | |
printf( "WARNING: Attachment post " . $post_id . " has a post parent that does not exist. \n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No orphan attachments found." . "\n" ); | |
$successes++; | |
} | |
/* post parent must be valid for non attachments too */ | |
$sql = "SELECT ID FROM wp_posts WHERE post_parent != '' AND post_parent != 0 AND post_type != 'attachment' AND post_parent NOT IN (SELECT ID from wp_posts)"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$post_id = $row['ID']; | |
printf( "WARNING: Post " . $post_id . " is a non attachment that has post parent that does not exist.\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No invalid post parents found for non attachments." . "\n" ); | |
$successes++; | |
} | |
/* check for postmeta orphans */ | |
$sql = "SELECT post_id FROM wp_postmeta WHERE post_id NOT IN (SELECT DISTINCT ID FROM wp_posts)"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$post_id = $row['post_id']; | |
printf( "WARNING: Orphan postmeta for post: " . $post_id . "\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No orphan postmeta found." . "\n" ); | |
$successes++; | |
} | |
/* check for wp_term_relationships orphans */ | |
$sql = "SELECT object_id FROM wp_term_relationships WHERE object_id NOT IN (SELECT ID FROM wp_posts)"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$object_id = $row['object_id']; | |
printf( "WARNING: Orphan term relationships for object that does not exist: " . $object_id . "\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No orphan term relationships found." . "\n" ); | |
$successes++; | |
} | |
/* check for usermeta orphans */ | |
$sql = "SELECT user_id FROM wp_usermeta WHERE user_id NOT IN (SELECT ID from wp_users)"; | |
$res = $mysqli->query( $sql ); | |
if ( $res->num_rows > 0 ) { | |
while( $row = $res->fetch_assoc( ) ) { | |
$user_id = $row['user_id']; | |
printf( "WARNING: Orphan usermeta for user: " . $user_id . "\n" ); | |
$warnings++; | |
} | |
} else { | |
printf( "SUCCESS: No orphan usermeta found." . "\n" ); | |
$successes++; | |
} | |
/* check that gallery ids are valid */ | |
$gallery_warnings = 0; | |
$sql = "SELECT ID FROM wp_posts WHERE post_content LIKE '%[gallery ids=%' AND post_status = 'publish'"; | |
$res = $mysqli->query( $sql ); | |
while( $row = $res->fetch_assoc( ) ) { | |
$post_id = $row['ID']; | |
// get the string of ids from out the gallery shortcode | |
// [gallery ids=" is 14 characters | |
// ]" is 2 characters | |
$sql2 = "SELECT SUBSTRING(post_content, LOCATE('[gallery ids=', post_content)+14, LOCATE(']', SUBSTRING(post_content, LOCATE('[gallery ids=', post_content)))-16) AS ids FROM wp_posts WHERE ID = " . $post_id; | |
$res2 = $mysqli->query( $sql2 ); | |
while( $row2 = $res2->fetch_assoc( ) ) { | |
$ids_string = $row2['ids']; | |
if ( trim( $ids_string ) != '' ) { | |
$ids = explode( ",", $ids_string ); | |
foreach ( $ids as $id ) { | |
// check wp_posts for each id in the gallery shortcode ids param | |
$sql3 = "SELECT COUNT(*) AS count FROM wp_posts WHERE ID = " . $id . " AND post_type = 'attachment'"; | |
$res3 = $mysqli->query( $sql3 ); | |
while( $row3 = $res3->fetch_assoc( ) ) { | |
$count = $row3['count']; | |
if ( $count == 0 ) { | |
printf( "WARNING: The gallery on post " . $post_id . " has a post attachment: " . $id . " that does not exist.\n" ); | |
$warnings++; | |
$gallery_warnings++; | |
} | |
} | |
} | |
} | |
} | |
} | |
if ( $gallery_warnings == 0 ) { | |
printf( "SUCCESS: All gallery ids checked and valid attachments found." . "\n" ); | |
$successes++; | |
} | |
printf( "Data Integrity Checks Complete.\n" . $warnings . " warnings \n" . $successes . " successes \n" ); | |
/* close connection */ | |
$mysqli->close(); | |
$time_end = microtime( true ); | |
//dividing with 60 will give the execution time in minutes otherwise seconds | |
$execution_time = ( $time_end - $time_start )/60; | |
//execution time of the script | |
printf("Total Execution Time: " . $execution_time . " mins \n"); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment