#!/usr/bin/perl -w
# $Id$
# This script converts the cvs dump of Atlantis to the schueler.txt format
#
# This script might be updated after installing an sophomorix-base package.
# You must copy it by hand to its location and adjust the configuration.
#
#
# Fetch data from Atlantis the following way:
#
#   0. Atlantis starten
#   1. Adressen-Schnellsuche (Fernglas Knopf)  
#   2. Schler: *
#      Suche Starten
#        => Anzeige fllt sich
#   3. Exportieren CSV-Format
#
#
#   Testen:
#    1) Aufruf von 
#         sophomorix-check --filter-only
#    2) Die gefilterte Datei liegt in
#         /var/lib/sophomorix/tmp/schueler.txt.filter.tmp  
#
#
#   WARNING:
#   Die entstehende Datei exportiert auch
#    - Adressen
#    - Lehrer
#    - Betriebe
#    - alte Eintrge der Vorjahre
#   Aus Datenschutzgrnden darf die Datei nicht auf dem System bleiben.
#   Deshalb wird die gefilterte (nachdem sie getestet wurde) ber die
#   ungefilterte kopiert:
#
#     cp /var/lib/sophomorix/tmp/schueler.txt.filter.tmp \
#        /etc/sophomorix/user/schueler.txt
#
# In case of Problems:
# jeffbeck@web.de  or  jeffbeck@gmx.de
#


############################################################
# Start configure 
############################################################

# what is the current school year (format:  2008/2009)
# must be updated each year
my $year_to_use="2008/09";

# number of semicolons in each line
# script returns error if this is not correct in EACH line
# key: number of selicolons
# value: max allowed number
my %semicolons_allowed = qw(
    18     30000
    19     2 
);



############################################################
# End configure 
############################################################

my %double_lastname = qw (
    van     none
    von     none
    der     none
    de      none
    De      none
    el      none
    El      none
);

# keep this settings
my $tmp="/var/lib/sophomorix/tmp/schueler.txt.filter.tmp";
my $source="/etc/sophomorix/user/schueler.txt";
my $target="/var/lib/sophomorix/tmp/schueler.txt.tmp";

my %semicolons_seen=();


############################################################
# Pre-Filter
############################################################
# copy the file, when the original seems corect
my $prefilter_error=0;
open(SOURCE, "<$source") || die "Fehler: $! $source not found!"; 
while (<SOURCE>){
    my $semicolons_per_line=tr/;//;
    if (not $semicolons_per_line==4){
        $prefilter_error=1;
        #print "SEM: $semicolons_per_line\n";
    }
}

close(SOURCE);

if ($prefilter_error==0){
    print "All Lines contain 4 semicolons\n";
    print "I consider the file correct without filtering it\n\n";
    print "Copying the file without modification\n\n";
    system("cp $source $target");
    print "Last message from filter script: ended regularly\n";
    exit 0;
} else {
    print "\nPrefilter: file must be filtered!\n\n";
}

############################################################
# Filter
############################################################

open(SOURCE, "<$source") || die "Fehler: $! $source not found!"; 
open(TMP, ">$tmp") || die "Fehler: $! $tmp not found!"; 

while (<SOURCE>){
    #print $_;
    my ($number,
        $type,
        $a,
        $name,
        $birthday,
        $b,
        $c,
        $d,
        $year,
        $type2,
        $class,
        @rest)=split(";");

    $number = &format_number($number);
    $class = &format_field($class);
    $name = &format_field($name);
    $birthday = &format_field($birthday);
    $type = &format_field($type);
    $type2 = &format_field($type2);
    $year = &format_field($year);

    my ($lastname, @firstnames) = split(" ",$name);
    my $firstname = join(" ", @firstnames);


    my $semicolons_per_line=tr/;//;

    my $old_num=$semicolons_seen{$semicolons_per_line};
    if (not defined $old_num){
       $old_num=0;        
    }
    my $new_num=$old_num+1;
    $semicolons_seen{$semicolons_per_line}="$new_num";

    if (not exists $semicolons_allowed{$semicolons_per_line}){ 
        print "ERROR: Line with unallowed number of semicolons: ",
              "$semicolons_per_line \n"; 
        print "$_"; 
    }

    $lastname="" if not defined $lastname;
    $firstname="" if not defined $firstname;


    if ($type eq "Schler"
        and $year eq $year_to_use
        and not $class=~m/^_/  # remove data with leading _ in classname
        ){

        my $save="First: ".$firstname." \nLast: ".$lastname;
        # when 
        my $count=0;
        while ( $firstname=~/\./ ){
            $count++;
            my ($add, @rest)=split(". ",$firstname);
            #print "$add\n";
   	    $lastname=$lastname." ".$add.".";
            $firstname=join(". ", @rest);
	    #print "$firstname\n";
            if ($count==3){
                # when name still contains '.', then there is something wrong
                # exit with error
                print "Last message from filter script: ended with ERROR: indefinite loop \n";
                exit 2;
            }
        }

        # correct van, von, ... stuff 
        my $newlast=$lastname;
        while (exists $double_lastname{$lastname}){
            my ($last, @first) = split(" ",$firstname);
            $firstname=join(" ",@first);
            $lastname=$last;
            $newlast=$newlast." ".$last;
        }

        if ($newlast ne $lastname){
            $lastname=$newlast;
            print "$lastname   : $firstname \n";
        }


        if ($count>1){
            print "$save \n   renamed to \nFirst: $firstname \nLast: $lastname\n";
        }
        my $line = $class.";".$lastname.";".$firstname.";".$birthday.";\n";
        print TMP "$line";
    }
}

close(SOURCE);
close(TMP);


# check if file was correct
my $error=0;

print "\nFilter results:\n";
while ( ($semi, $seminum) = each %semicolons_seen ){
    my $line_err=0;
    my $print_semicolons_allowed=$semicolons_allowed{$semi};
    if(not defined $print_semicolons_allowed){
        $print_semicolons_allowed=0;
    }
    printf "   I have seen %6s of allowed %6s lines with %3s semicolons: ",
      $seminum,
      $print_semicolons_allowed,
      $semi;
    if (not exists $semicolons_allowed{$semi}){
	$error++;
        $line_err=1;
    } else {
        # correct
    }

    if ($seminum <= $print_semicolons_allowed){
        # correct
    } else {
	$error++;
        $line_err=1;
    }


 
    if ($line_err==0){
        print "CORRECT!\n";
    } else {
        print "ERROR!\n";
    }
}
print "\n";



if ($error==0){
    # exit regularly
    system("cp $tmp $target");
    #system("rm $tmp");
    print "Last message from filter script: ended regularly\n";
    exit 0;
} else {
    # exit with error
    print "Last message from filter script: ended with ERROR!\n";
    exit 1;
}





############################################################
# subs
############################################################

sub format_field {
    my ($string) = @_;
    $string=~s/^"//;
    $string=~s/"$//;
    return $string;
}



sub format_number {
    my ($number) = @_;
    $number=~s/^"//;
    $number=~s/\.$//;
    return $number;
}

