#!/usr/bin/perl -w
# $Id: atlantis-csv-filter-schueler,v 1.5 2009-02-19 15:03:41 jeffbeck Exp $
# This script converts the cvs dump of Atlantis to the schueler.txt format
#
# This script might be updated after installing an sophomorix-base package.
# You must copy it by hand to its location and adjust the configuration.
#
#
# Fetch data from Atlantis the following way:
#
#   1. Atlantis starten
#   2. Button unter Men: "Schler aktuelles Schuljahr" 
#                         (Kopf mit schwarzer Brille)
#   3. Fenster:
#        Schuljahr: 2008/09 (aktuelles Schuljahr whlen)
#        Zeugnistyp/Sammelerfassung?: -> leeres Feld  
#   4. Suchbegriff (Schler): *
#      Button im Fenster: Suche Starten
#        => Anzeige fllt sich
#   5. Button unter Men: Exportieren Textformat/Excel
#         Dateiname: angeben  
#         Dateityp:   CSV
#   6. Prfen:
#         Dateigre etwa 1200kB je 1000 Schler
#
#
#   Testen:
#    1) Aufruf von 
#         sophomorix-check --filter-only
#    2) Die gefilterte Datei liegt in
#         /var/lib/sophomorix/tmp/schueler.txt.filter.tmp  
#
# In case of Problems:
# jeffbeck@web.de  or  jeffbeck@gmx.de
#


############################################################
# Start configure 
############################################################

# number of semicolons in each line
# script returns error if this is not correct in EACH line
# key: number of selicolons
# value: max allowed number
my %semicolons_allowed = qw(
    291     3600
    292     2
);


# do you want to try to dump the unique id
# 0: do NOT dump
# 1: dump it!
my $dump_id=1;

# counter for ignored students
my $count_ignored_by_class_name=0;


############################################################
# End configure 
############################################################

# keep this settings
my $tmp="/var/lib/sophomorix/tmp/schueler.txt.filter.tmp";
my $source="/etc/sophomorix/user/schueler.txt";
my $target="/var/lib/sophomorix/tmp/schueler.txt.tmp";
my %semicolons_seen=();

############################################################
# Pre-Filter
############################################################
# copy the file, when the original seems corect
my $prefilter_error=0;
open(SOURCE, "<$source") || die "Fehler: $! $source not found!"; 
while (<SOURCE>){
    my $semicolons_per_line=tr/;//;
    if (not $semicolons_per_line==4){
        $prefilter_error=1;
        #print "SEM: $semicolons_per_line\n";
    }
}

close(SOURCE);

if ($prefilter_error==0){
    print "All Lines contain 4 semicolons\n";
    print "I consider the file correct without filtering it\n\n";
    print "Copying the file without modification\n\n";
    system("cp $source $target");
    print "Last message from filter script: ended regularly\n";
    exit 0;
} else {
    print "\nPrefilter: file must be filtered!\n\n";
}

############################################################
# Filter
############################################################

open(SOURCE, "<$source") || die "Fehler: $! $source not found!"; 
open(TMP, ">$tmp") || die "Fehler: $! $tmp not found!"; 

my %header=();
my %header_num=();
my $line_num=0;
while (<SOURCE>){
    chomp();
    my @line=split("\";\"");
    pop @line;
    my $field_num=0;

    if($line_num==0 and ($line[6] eq "Klasse"  or $line[6] eq "\"Klasse\"" ) ){
        # read the headers
        foreach my $item (@line){
            $item = &format_field($item);
            $header{$field_num}="$item";
            $header{$item}="$field_num";
            $field_num++;
        }    
        $line_num++;
        next;
    } elsif ($line_num==0) {
        print "\nWARNING: Could not read Header ($line[6])\n";
    }

   if (not exists $header{'Geb.Datum'} or
       not exists $header{'Klasse'} or
       not exists $header{'Familienname'} or
       not exists $header{'Vorname'}  ){
       print "\nERROR: Could not analyze header \n\n";
       exit 1;
   }

    # change the number from 10000 to the line, that you want printed
    if($line_num==10000){
#    if($line_num==10){
        print "Line ${line_num}:\n";
        foreach my $item (@line){
            $item = &format_field($item);
            printf "|%4s | %-28s| %-43s|\n",$field_num,$header{$field_num},$item;
            $field_num++;
        }
        exit;
    }


    # unused
    my $name=$line[$header{'Name'}];

    # used
    my $lastname=$line[$header{'Familienname'}];
    my $firstname=$line[$header{'Vorname'}];
    my $birthday=$line[$header{'Geb.Datum'}];
    my $class=$line[$header{'Klasse'}];
    # 13 Klassenlehrer

    # unid fields
    my $id_1=$line[$header{'Barcode'}]; # Barcode   :  ABCid
    
    # ab 14.10.2011 nicht mehr dabei (07.10.2011 war noch OK)
    # my $id_0=$line[$header{'Externe Id'}]; # Externe ID:  -id
    #my $id_2=$line[$header{'Externe Id Behinderungsart'}]; # Externe ID Behinderungsart:  -id
    #my $id_3=$line[$header{'Externe Id Gs Empfehlung'}]; # Externe ID GS Empfehlung:  -id

    # extract id from barcode
    # 1 remove LETTERS
    $id_1=~s/[A-Z]//g;
    # 2 add - in the beginning
    $id_1="-".$id_1;

    $class = &format_field($class);
    $lastname = &format_field($lastname);
    $firstname = &format_field($firstname);
    $birthday = &format_field($birthday);
    $id_1 = &format_field($id_1);

    my $semicolons_per_line=tr/;//;

    my $old_num=$semicolons_seen{$semicolons_per_line};
    if (not defined $old_num){
       $old_num=0;        
    }
    my $new_num=$old_num+1;
    $semicolons_seen{$semicolons_per_line}="$new_num";

    if (not exists $semicolons_allowed{$semicolons_per_line}){ 
        print "ERROR: Line with unallowed number of semicolons: ",
              "$semicolons_per_line \n"; 
        print "$_"; 
    }

    $lastname="" if not defined $lastname;
    $firstname="" if not defined $firstname;


    my $out_line;
    if ($dump_id==0){
        $out_line = $class.";".$lastname.";".$firstname.";".$birthday.";\n";
    } elsif ($dump_id==1){
        $out_line = $class.";".$lastname.";".$firstname.";".$birthday.";".$id_1.";\n";
    } else {
        print "Could not dump into file\n\n";
        exit;
    }

    # ignore classes beginning with _
    if (not $class=~/^\_/){
        print TMP "$out_line";
    } else {
        print "Ignoring $out_line";
        $count_ignored_by_class_name++;
    }

    $line_num++;
}

close(SOURCE);
print "Filtered data written to $tmp\n";
close(TMP);


# check if file was correct
my $error=0;

print "\nIgnored by class name: ($count_ignored_by_class_name)\n\n";

while ( ($semi, $seminum) = each %semicolons_seen ){
    my $line_err=0;
    my $print_semicolons_allowed=$semicolons_allowed{$semi};
    if(not defined $print_semicolons_allowed){
        $print_semicolons_allowed=0;
    }
    printf "I have seen %6s of allowed %6s lines with %3s semicolons: ",
      $seminum,
      $print_semicolons_allowed,
      $semi;
    if (not exists $semicolons_allowed{$semi}){
	$error++;
        $line_err=1;
    } else {
        # correct
    }

    if ($seminum <= $print_semicolons_allowed){
        # correct
    } else {
	$error++;
        $line_err=1;
    }


 
    if ($line_err==0){
        print "CORRECT!\n";
    } else {
        print "ERROR!\n";
    }
}
print "\n";



if ($error==0){
    # exit regularly
    system("cp $tmp $target");
    #system("rm $tmp");
    print "Last message from filter script: ended regularly\n";
    exit 0;
} else {
    # exit with error
    print "Last message from filter script: ended with ERROR!\n";
    exit 1;
}





############################################################
# subs
############################################################

sub format_field {
    my ($string) = @_;
#    $string=~s/^"//;
#    $string=~s/"$//;
    $string=~s/^ //;
    $string=~s/ $//;
    return $string;
}


