#!/usr/bin/perl ####################################################################### # # Filename: removeDupLines.pl # # Purpose: This script will remove duplicate lines from the data directory # of the specified product. The lines it will remove are # defined at the top of the program. # # Date: Jul. 02, 2002 # Saeid Asgari-Marani # # Notes: # ####################################################################### if ($#ARGV < 0) { print "\n\nUsage : $0 product \n"; print " which\n"; print " product : product name like nms\n"; print " text : text which duplicate lines containing that line will be removed\n\n"; exit 1; } $product = $ARGV[0]; $CDNProducts = 'passport ' . 'omsp ' . '6K ' . 'dpn ' . 'pnp ' . 'expressmanager ' . 'servicemonitor ' . 'mdm '; $dataroot = ""; $dataroot = "/d/cdn_metrics/clarifyWeekly/$product" if ($CDNProducts =~ $product); $dataroot = "/d/mag_metrics/clarifyWeekly/$product" if ($product eq "6K"); #$dataroot = "/home/users/ironwood/TestData"; if (!$dataroot) { print "Invalid product name $product\n"; exit 1; } $numFiles = 0; &FindIt($dataroot); #print "No. of files which data exist : $numFiles\n"; # main body of script sub FindIt { local ($directory) = @_; opendir(DATAROOT, $directory) || print "Error: opening $directory"; local @datadirs = grep(!/^\.\.?$/, readdir(DATAROOT)); closedir(DATAROOT); # print "Working on $directory\n"; local $dir; foreach $dir (@datadirs) { if(-f "$directory/$dir") { # print "\tChecking $directory/$dir\t"; open(FILECHECK, "$directory/$dir") || print "Error: reading $directory/$dir\n"; local @lines = ; close(FILECHECK); # We have to check all lines to see if all starts with a year, # so it will be a file to work on it. $ItIsDataFile = 1; foreach (@lines) { if (!(/^([0-9][0-9][0-9][0-9][0-9][0-9])\s+.*/)) #This file is not a data file { $ItIsDataFile = 0; last; } } if ($ItIsDataFile) { # First we sort them @sortLines = sort(@lines); $Changed = 0; $dupWeek = 0; $oldWeek = 0; $oldYear = 0; $oldBusWeek = 0; $printText = "$directory/$dir : \n"; for ($i = 0; $i <= $#lines; $i++) { $sortLines[$i] =~ /^([0-9][0-9][0-9][0-9])([0-9][0-9])\s+.*/; $curYear = $1; $curBusWeek = $2; $curWeek = "$curYear$curBusWeek"; if ($oldWeek != "") { next if ($curWeek == $dupWeek); if ($curWeek - $oldWeek == 0 && $curWeek != $dupWeek) { print "$printText\tDuplicates $curWeek\n"; $printText = ""; $dupWeek = $curWeek; next; } if ($curWeek - $oldWeek != 1) { if ($curYear - $oldYear != 1) { print "$printText\tMissing data between $oldWeek <-> $curWeek\n"; $printText = ""; } elsif (!($curBusWeek == "01" && ($oldBusWeek == "52" || $oldBusWeek == "53"))) { print "$printText\tMissing data between $oldWeek <-> $curWeek\n"; $printText = ""; } } } $oldWeek = $curWeek; $oldYear = $curYear; $oldBusWeek = $curBusWeek; $Changed = 1 if ($sortLines[$i] ne $lines[$i]); } if ($Changed) { print "\tSorting $directory/$dir\n"; open OUT, ">$directory/$dir" || print "Error: writing $directory/$dir\n"; print OUT @sortLines; close OUT; } } next; } &FindIt("$directory/$dir") if (-d "$directory/$dir"); } }