HI I have a master spreadsheet with 60 columns, which is updated with new data from an updates file. The new data can be whole new lines or just cells. luckily i came across your site and found an excellent script which does exactly what i want, the only problem i have, is in my case i reference column 'B' in my master file to see if any new data fields need updating, where as in the script on your site the reference ($username) is in column A. the problem i have is when i output the new file i cannot get the columns back in order, ie the column 'B' is always the 1st column, i can't get it back in the original order. column A column B etc. as you will see from my script i can only get output column B A B C D etc.. Could someone advice me where i am going wrong.
Thanks in anticipation.

#!/usr/bin/perl
use strict;
use warnings;
#use diagnostics;
use 5.10.0;    # use for smart matching '~~'
use Tk::DirTree;
use Cwd;
use Spreadsheet::BasicRead;
    my $fhmaster = "C:\\Temp\\NCPS_Data\\school_data.xlsx";
                my $fhmastercsv = "C:\\Temp\\NCPS_Data\\Mre_temp\\master_file.csv";

        open( OUTPUT1, '>', "$fhmastercsv" ) or die "unable to open $fhmastercsv !\n Does it Exist ?\n";    

                my $fhupdatescsv = "C:\\Temp\\NCPS_Data\\Mre_temp\\upload_file.csv";
        open( OUTPUT2, '>', "$fhupdatescsv" ) or die "unable to open $fhupdatescsv !\n Does it Exist ?\n";      
#====================== master file =====================================================================   

my @csvmaster;
        if ( -e $fhmaster ) {
my $ss   = new Spreadsheet::BasicRead($fhmaster) or die;
    my $col = '';
    my $row  = 0;
    while ( my $data = $ss->getNextRow() ) {
        $row++;
        $col= join( "\t", @$data );
            push @csvmaster,  $col . "\n" if ( $col ne "" );
    }
}   

    my @arrymstr;   
my %seenmstr;
  my $Header_mstr = shift (@csvmaster);  

  foreach (@csvmaster){
chomp (@csvmaster);
                $_ = uc $_ ;
                 $_ =~ s/,//g;                                      #removes commas
                $_ =~ s/\t/,/g;                                 #change from tab to  csv
            print OUTPUT1 $_ . "\n"  if !$seenmstr{$_}++ ;    #remove any dupes
}   
#===================== update file ======================================================================
my $bmw = new MainWindow;
$bmw->withdraw;
$bmw -> focusForce;
my $ok = 1;
my $logo = "C:\\perl\\Mre\\Images\\tk.bmp";    # 32x32 GIF or BMP
my $icon = $bmw->Photo(-file => $logo);
$bmw->idletasks;       
$bmw->iconimage($icon);

my $f = $bmw->Frame->pack();
  my @types =
       (["All files",        '*'],["Log files", [qw/.txt .log/]]
       );
  my $fhupdates = $bmw->getOpenFile();
  $fhupdates ="" if(!$fhupdates);
  $fhupdates =~ s/\s+$//;
  $fhupdates =~ s/\//\\\\/ig;

$f->Button(-text => 'Ok',
           -command => sub { $ok = 1 })->pack(-side => 'left');

$f->Button(-text => 'Cancel',
           -command => [\&Cancel_buttons] )->pack(-side => 'left');
#--------------------------------------------------------------------------------------------------------------
my @csvupdate;
        if ( -e $fhupdates ) {
my $ss   = new Spreadsheet::BasicRead($fhupdates) or die;
    my $col = '';
    my $row  = 0;
    while ( my $data = $ss->getNextRow() ) {
        $row++;
        $col= join( "\t", @$data );

            push @csvupdate,  $col . "\n" if ( $col ne "" );
    }
}   

my @arryupdte;  
my %seenupdte;
my  $Header_updte = shift (@csvupdate);  
  foreach (@csvupdate){
chomp(@csvupdate);
     $_ = uc $_ ;
     $_ =~ s/,//g;                                      #removes commas
     $_ =~ s/\t/,/g;                                    #change from tab to  csv
             print OUTPUT2  $_ . "\n"  if !$seenupdte{$_}++ ;    #remove any dupes
}   

close OUTPUT1;
close OUTPUT2;

# compare both files and update master with an new data

    our $user_and_attr_ref = {};    # intialize hash reference
    open my $fh, '<', "C:\\Temp\\NCPS_Data\\Mre_temp\\master_file.csv" or die "can't open file Mre_temp\\master_file.csv ";
    foreach (<$fh>) {
        chomp;
        my ( $classnme ,$childnme, $gender, $classyr ) = split /,/, $_, 4;
            $classnme="" if(!$classnme);
            $childnme="" if(!$childnme);
            $gender="" if(!$gender);
            $classyr="" if(!$classyr);
        # use username as a key in the hash of Array
        push @{ $user_and_attr_ref->{$childnme} }, qq{$classnme,$childnme, $gender,$classyr};
    }
    close $fh or die "can't close file: Mre_temp\\master_file.csv";
    my $file = "C:\\Temp\\NCPS_Data\\Mre_temp\\upload_file.csv";
    #return two files, using a subroutine file_outputs

     my ( $output_file1, $output_file2 ) = file_outputs($file);
    #using an anonymous hash reference in a subroutine write_out_new_file
    # twice, file variable, filename and a subroutine reference was passed
    write_out_new_file(
        {
            file     => $output_file1,
            filename => "C:\\Temp\\NCPS_Data\\Mre_temp\\new_file.csv",
            code     => \&check_file_to_write,
        }
    );
  # my ( $output_file2 ) = file_outputs($file);
    write_out_new_file(
        {
            file     => $output_file2,
            filename => "C:\\Temp\\NCPS_Data\\Mre_temp\\changed_file.csv",
            code     => sub { return $_[0]; },
        }
    );
    sub file_outputs {
        my ($filename) = @_;
        my ( $new_master_file_str, $update_file_str ) = ( q{}, q{} ); # q{} means ''
        open my $fh, '<', $filename or die "can't open file $filename ";
        foreach (<$fh>) {
            chomp;
            my ( $classnme, $childnme, $gender, $classyr ) = split /,/, $_, 4;
            $classnme="" if(!$classnme);# I do this so warnings will not complain
            $childnme="" if(!$childnme);
            $gender="" if(!$gender);
            $classyr="" if(!$classyr);
            $_ = uc $_ for $classnme, $childnme, $gender;
                    $_ ||= '' for $classnme, $childnme, $gender;
            if ( exists $user_and_attr_ref->{$childnme} ) {
                my @user_data_in_upload_file = qq{$classnme, $childnme, $gender,$classyr};
                my @user_data_in_master_file =
                  grep { $_ } values @{ $user_and_attr_ref->{$childnme} };

                if ( @user_data_in_upload_file ~~ @user_data_in_master_file ) {
                    $new_master_file_str .= qq{$classnme,$childnme,$gender,$classyr} . $/;
                }  
            }
            else {
                    $update_file_str .= 
                    qq{$classnme,$childnme,$gender,$classyr,NEW} . $/;
                    $new_master_file_str .= 
                    qq{$classnme,$childnme,$gender,$classyr} . $/;
            }
        }
        return $new_master_file_str, $update_file_str if wantarray;
    }

    sub write_out_new_file {
        my ($file) = @_;
        open my $fh, '>', $file->{'filename'} or die "can't open file \n Does it Exist !\n";
        print {$fh} $file->{'code'}->( $file->{'file'} );
    }
    sub check_file_to_write {
        my $new_user_and_attr_ref = {};    #initialize a hash reference
        foreach ( split "\n", $_[0] ) {
            my ( $classnme, $childnme, $gender, $classyr ) = split /,/, $_, 4;
            push @{ $new_user_and_attr_ref->{$childnme} }, qq{$classnme, $childnme, $gender,$classyr};

        }
        my @key = sort keys %$user_and_attr_ref;
        for (@key) {
            if ( !exists $new_user_and_attr_ref->{$_} ) {
                push @{ $new_user_and_attr_ref->{$_} },
                  @{ $user_and_attr_ref->{$_} }, $/;
            }
        }
        my $file = q{};
        foreach my $sorted_data ( sort keys %$new_user_and_attr_ref ) {
            $file .= sprintf "%s,%s\n", $sorted_data,
              @{ $new_user_and_attr_ref->{$sorted_data} };

        }
        return $file;
    }
close $file;

Hi johntim,

the problem i have is when i output the new file i cannot get the columns back in order, ie the column 'B' is always the 1st column

The reason is because, you might still have being outputing your final result in the same order the script you found and you used is doing it own. In other words, check the order by which the script you found is doing it. And change it to align with what you wanted.

Secondly, use a scoped file-handler i.e instead of open OUTPUT1... use open my $fh ....

Try not to hardcore your filenames and directory into your program to make it potable and reuseable.

Lastly, you didn't show us an example of the file you are either inputting or outputting, at least that help to visualizied what you wanted.

Hope this helps.

Edited 8 Months Ago by 2teez

master file example

Class,pupil,M/F,Year,
LEOPARDS,AMIE CAHORT,,2,
ZEBRAS,JIM LEE,,3,
MEERKATS,JAMES LEHERN,M,,   

update file example

Class,Pupil,M/F,Year,
ZEBRAS,JIM LEE,M,3,
MEERKATS,DAN DARE,M,, 
MONKEYS,JOHN JONES,M,5,
HIPPOS,A PELE,M,6,
MEERKATS,JAMES LEHERN,M,4,

expected output

Class,Pupil,M/F,year,
LEOPARDS,AMIE CAHORT,,2,
ZEBRAS, JIM LEE,M,3,
MEERKATS,JAMES LEHERN,M,4,
MEERKATS,DAN DARE,M,,
MONKEYS,JOHN JONES,M,5,
HIPPOS,A PELE,M,6,

Hi,

What you can do even more simply is using an hash to get the values from the master file, then open and parse through the updated file, a row at a time, then check to see if any row matched with the existing data in the master file hash. If yes, update it with the present value from the update file, if no add it to the master hash data. The print your result.

Using the data you presented here the code below does what you want. But the modules I used are not CORE modules in Perl, so you might have to install them from CPAN, that is if you don't already have them installed.

use warnings;
use strict;

use Text::CSV;
use Inline::Files;
use Data::Dump;

my $data = {};

parse_data(
    {
        file => \*FILE1,
        code => sub {
            my $value = shift;    # scope variable for each row
            push @{ $data->{ join '' => $value->[0], $value->[1] } } => @$value;
        }
    }
);

print parse_data(
    {
        file => \*FILE2,
        code => sub {
            my $value = shift;    # scope variable for each row
            my $key_to_check = join '' => $value->[0], $value->[1];
            if ( exists $data->{$key_to_check} ) {
                delete $data->{$key_to_check};  # update existing data
                push @{ $data->{ join '' => $value->[0], $value->[1] } } =>
                  @$value;
            }
            else { # add data
                push @{ $data->{ join '' => $value->[0], $value->[1] } } =>
                  @$value;
            }
        }
    }
);

dd values %{$data};  # print out the final output

sub parse_data {
    my $csv = Text::CSV->new( { binary => 1 } )
      or die "can't open file" . Text::CSV->error_diag();
    my $value = shift;
    my $fh    = *{ $value->{file} };

    # take out the header
    my $header = <$fh>;
    while ( my $row = $csv->getline($fh) ) {
        next if $row->[0] eq '';
        $value->{code}->($row);  ## a call to anonymous subroutine
    }

    return $header;
}

__FILE1__
Class, pupil, M / F, Year,
LEOPARDS, AMIE CAHORT,, 2,
ZEBRAS,   JIM LEE,,     3,
MEERKATS, JAMES LEHERN, M,,

__FILE2__
Class, Pupil, M / F, Year,
ZEBRAS, JIM LEE, M, 3,
MEERKATS, DAN DARE,     M,,
MONKEYS,  JOHN JONES,   M, 5,
HIPPOS,   A PELE,       M, 6,
MEERKATS, JAMES LEHERN, M, 4,

The output is like so:

Class, Pupil, M / F, Year,
(
  ["ZEBRAS", " JIM LEE", " M", " 3", ""],
  ["MEERKATS", " DAN DARE", "     M", "", ""],
  ["ZEBRAS", "   JIM LEE", "", "     3", ""],
  ["MEERKATS", " JAMES LEHERN", " M", " 4", ""],
  ["LEOPARDS", " AMIE CAHORT", "", " 2", ""],
  ["HIPPOS", "   A PELE", "       M", " 6", ""],
  ["MONKEYS", "  JOHN JONES", "   M", " 5", ""],
)

NOTE: Need I say that what you have is a CSV file. And you should you a moudle that properly does that for you, instead of handpicking the values yourself.

Hope this helps.

Hello,
You might have noticed that if the master file and the updated file data have several spaces in the values, they might be occur more than once. So, you must find a way of removing the spaces around the values before parsing them in the hash variable to have uniform data to work with.
The code I provided work well, but I left out two things for the OP to figure out namely:
1. remove the spaces on each value in the row so that the like of

["ZEBRAS", " JIM LEE", "", " 3", ""],

and

["ZEBRAS", " JIM LEE", " M", " 3", ""], are seen as one and updated accordingly.

  1. Output the keys of final hash variable to give the desired output.

    Class, Pupil, M / F, Year,
    (
    ["LEOPARDS", "AMIE CAHORT", "", 2, ""],
    ["MEERKATS", "JAMES LEHERN", "M", 4, ""],
    ["ZEBRAS", "JIM LEE", "M", 3, ""],
    ["MEERKATS", "DAN DARE", "M", "", ""],
    ["HIPPOS", "A PELE", "M", 6, ""],
    ["MONKEYS", "JOHN JONES", "M", 5, ""],
    )

Hope this helps

Edited 8 Months Ago by 2teez

This question has already been answered. Start a new discussion instead.