Skip to content

Instantly share code, notes, and snippets.

@kvignos
Created October 13, 2014 07:15
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kvignos/b41a61564aea2cbcaf69 to your computer and use it in GitHub Desktop.
Save kvignos/b41a61564aea2cbcaf69 to your computer and use it in GitHub Desktop.
Data Integrity Checks for WordPress Migrations
#!/usr/bin/php
<?php
$time_start = microtime( true );
//mysql setup
$host = 'localhost';
$user = 'wordpress';
$pass = 'wordpress';
$db = 'wordpress';
$mysqli = new mysqli( $host, $user, $pass, $db );
$warnings = 0;
$successes = 0;
/*****************************************/
/* VALIDATION */
/*****************************************/
/* no homeless, uncategorized posts */
$sql = "SELECT * FROM wp_term_relationships WHERE term_taxonomy_id IN (SELECT term_taxonomy_id FROM wp_term_taxonomy WHERE taxonomy='category' AND term_id IN (SELECT term_id FROM wp_terms WHERE slug='uncategorized'))";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$object_id = $row['object_id'];
printf( "WARNING: Post " . $object_id . " has an invalid category.\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No uncategorized posts found." . "\n" );
$successes++;
}
/* no phantom authors - posts should not have post_authors that don't exist */
$sql = "SELECT wp_posts.ID FROM wp_posts LEFT JOIN wp_users ON wp_posts.post_author = wp_users.ID WHERE wp_users.ID IS NULL GROUP BY wp_posts.post_author";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$post_id = $row['ID'];
printf( "WARNING: Post " . $post_id . " has an invalid author.\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No invalid authors found." . "\n" );
$successes++;
}
/*****************************************/
/* DUPLICATES */
/*****************************************/
/* no redirect posts aka cross posts */
$sql = "SELECT wp_posts.ID FROM wp_posts WHERE ID IN (SELECT post_id FROM wp_postmeta WHERE meta_key = '_yoast_wpseo_redirect' AND meta_value LIKE '%20%') AND post_type = 'post'";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$post_id = $row['ID'];
printf( "WARNING: Post " . $post_id . " is a redirect aka cross post. \n" );
$warnings++;
}
} else {
printf( "SUCCESS: No redirect aka cross posts found." . "\n" );
$successes++;
}
/* no duplicate posts - no posts with two rows in publish status with matching post date and post title */
$sql = "SELECT post_date, post_title, post_name, post_content, COUNT(*) as count FROM wp_posts WHERE post_status = 'publish' and post_type = 'post' and post_title != '' GROUP BY post_title, post_date, post_content HAVING COUNT(*) > 1";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$post_date = $row['post_date'];
$post_title = $row['post_title'];
$post_name = $row['post_name'];
$count = $row['count'];
printf( "WARNING: Duplicate post by date, title, and content: " . $post_date . " | " . $post_title . " | " . $post_name . " | " . $count . "\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No duplicate posts found by date, title, and content." . "\n" );
$successes++;
}
/* no duplicate guids */
$sql = "SELECT guid, post_date, post_name, COUNT(*) as count FROM wp_posts WHERE post_status = 'publish' GROUP BY guid, post_date, post_name HAVING COUNT(*) > 1";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$guid = $row['guid'];
$count = $row['count'];
printf( "WARNING: Duplicate post by guid: " . $guid . " | " . $count . "\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No duplicate posts found by guid." . "\n" );
$successes++;
}
/*****************************************/
/* ORPHANS */
/*****************************************/
/* attachment mapping - no orphan attachments */
$sql = "SELECT wp_posts.ID FROM wp_posts WHERE post_type = 'attachment' AND post_parent != 0 AND post_parent NOT IN (SELECT ID from wp_posts)";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$post_id = $row['ID'];
printf( "WARNING: Attachment post " . $post_id . " has a post parent that does not exist. \n" );
$warnings++;
}
} else {
printf( "SUCCESS: No orphan attachments found." . "\n" );
$successes++;
}
/* post parent must be valid for non attachments too */
$sql = "SELECT ID FROM wp_posts WHERE post_parent != '' AND post_parent != 0 AND post_type != 'attachment' AND post_parent NOT IN (SELECT ID from wp_posts)";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$post_id = $row['ID'];
printf( "WARNING: Post " . $post_id . " is a non attachment that has post parent that does not exist.\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No invalid post parents found for non attachments." . "\n" );
$successes++;
}
/* check for postmeta orphans */
$sql = "SELECT post_id FROM wp_postmeta WHERE post_id NOT IN (SELECT DISTINCT ID FROM wp_posts)";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$post_id = $row['post_id'];
printf( "WARNING: Orphan postmeta for post: " . $post_id . "\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No orphan postmeta found." . "\n" );
$successes++;
}
/* check for wp_term_relationships orphans */
$sql = "SELECT object_id FROM wp_term_relationships WHERE object_id NOT IN (SELECT ID FROM wp_posts)";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$object_id = $row['object_id'];
printf( "WARNING: Orphan term relationships for object that does not exist: " . $object_id . "\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No orphan term relationships found." . "\n" );
$successes++;
}
/* check for usermeta orphans */
$sql = "SELECT user_id FROM wp_usermeta WHERE user_id NOT IN (SELECT ID from wp_users)";
$res = $mysqli->query( $sql );
if ( $res->num_rows > 0 ) {
while( $row = $res->fetch_assoc( ) ) {
$user_id = $row['user_id'];
printf( "WARNING: Orphan usermeta for user: " . $user_id . "\n" );
$warnings++;
}
} else {
printf( "SUCCESS: No orphan usermeta found." . "\n" );
$successes++;
}
/* check that gallery ids are valid */
$gallery_warnings = 0;
$sql = "SELECT ID FROM wp_posts WHERE post_content LIKE '%[gallery ids=%' AND post_status = 'publish'";
$res = $mysqli->query( $sql );
while( $row = $res->fetch_assoc( ) ) {
$post_id = $row['ID'];
// get the string of ids from out the gallery shortcode
// [gallery ids=" is 14 characters
// ]" is 2 characters
$sql2 = "SELECT SUBSTRING(post_content, LOCATE('[gallery ids=', post_content)+14, LOCATE(']', SUBSTRING(post_content, LOCATE('[gallery ids=', post_content)))-16) AS ids FROM wp_posts WHERE ID = " . $post_id;
$res2 = $mysqli->query( $sql2 );
while( $row2 = $res2->fetch_assoc( ) ) {
$ids_string = $row2['ids'];
if ( trim( $ids_string ) != '' ) {
$ids = explode( ",", $ids_string );
foreach ( $ids as $id ) {
// check wp_posts for each id in the gallery shortcode ids param
$sql3 = "SELECT COUNT(*) AS count FROM wp_posts WHERE ID = " . $id . " AND post_type = 'attachment'";
$res3 = $mysqli->query( $sql3 );
while( $row3 = $res3->fetch_assoc( ) ) {
$count = $row3['count'];
if ( $count == 0 ) {
printf( "WARNING: The gallery on post " . $post_id . " has a post attachment: " . $id . " that does not exist.\n" );
$warnings++;
$gallery_warnings++;
}
}
}
}
}
}
if ( $gallery_warnings == 0 ) {
printf( "SUCCESS: All gallery ids checked and valid attachments found." . "\n" );
$successes++;
}
printf( "Data Integrity Checks Complete.\n" . $warnings . " warnings \n" . $successes . " successes \n" );
/* close connection */
$mysqli->close();
$time_end = microtime( true );
//dividing with 60 will give the execution time in minutes otherwise seconds
$execution_time = ( $time_end - $time_start )/60;
//execution time of the script
printf("Total Execution Time: " . $execution_time . " mins \n");
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment