#!/usr/bin/perl
use strict;
use warnings;

#use Text::CSV;
use DBI;
use Scalar::Util qw(looks_like_number);
use List::Util qw(min);
use List::Util qw(max);
use List::Util qw(any);
use List::MoreUtils qw(uniq);

use Date::Calc qw(Add_Delta_YM);
use DateTime;
use DateTime::Duration;

use Statistics::LineFit;

#
#
#

my $debug = 0;

#
# Statistical processing for noisy data
#
my $adj_confidence = 0;        # Adjust trendline slope according to R2 confidence
my $smooth_baseline = 1;       # Smooth the deaths in the trend baseline
my $smooth_auto = 2;           # Automaticaly smooth the raw data in certain demographics (SCHOOL, CHILDREN), certain diseases (influenza) and if the pop is small 0 = Never.  1 = Auto   2 = All

## Noise reduction parameters
#

my $signal_proc_raw = "raw";  # No signal processing
#my $signal_proc_cooked = "ma";  # Can be "ma" or "loess" currently
my $signal_proc_cooked = "loess";  # Can be "ma" (moving average) or "loess" (my loess algorithm)

#
# Moving Average Smoother parameters
#
my $smooth_window_trends = 2;  # Size of the moving average window for smoothing trends
                               #(na if $smooth_baseline is 0 or the base death data has already been smoothed
my $smooth_window_deaths = 2;  # Size of the smoothing window to apply to the base death data

#
# Loess Smoother parameters
my $loess_span = 5; # Adjust this value to change the span of smoothing
my $loess_degree = 1; # Adjust this value to change the degree of smoothing
my $loess_span_trends = 3; # Adjust this value to change the span of smoothing
my $loess_degree_trends = 1; # Adjust this value to change the degree of smoothing
#
####################################

#

#
# Allocate 999s and C99s
#
my $allocate_99s = 1;

#
# Basic reporting lag -- what percentage of death certificates are received from the states to the CDC
#                        my month of report.  I.e. deaths that are only 1 month old are 50% complete.  2 months are 70% etc.
#

my $est_window = 18;  # Mortality numbers within 18 months are flagged as estimates

my @reporting_lag = (0.50,0.70,0.80,0.85,0.90,0.95);  # Beyond end of reporting_lag assumed to be 100% reported

#
# Trend points.  These set the minimium number of trend points necessary to establish a trend
# As well as a warning threshold
#

my $minTrendPoints = 3;
my $minTrendPointsWARN = 4;


#
# Main internal data structures.  Death data is read from disk (mysql database)
# and then stored here in these hashes.  Each hash has several dimensions
#
#
my %deaths;
my %population;

#  DEMO START
#  Demographics (age ranges only at this point)
#  These hashes map age demographic codes as they come from WONDER (@age_codes) into normalized
#  codes that are used internally (@display_codes)
#

my @display_codes = ("< 1","1-4","0-17","5-14","5-17","15-24","18-24","25-34","35-44","45-54","55-64","65-74","75-84","85+","ALL AGES");
my @nondistinct_codes = ("< 1","1-4","5-14","5-17","15-24","ALL AGES");
my @age_codes = ("< 1 year","1-4 years","0-17 years","5-14 years","5-17 years","15-24 years","18-24 years","25-34 years","35-44 years","45-54 years",
                 "55-64 years","65-74 years","75-84 years","85+ years","ALL AGES");

my %code_map;
my %code_map_r;

@code_map{@display_codes} = @age_codes;
@code_map_r{@age_codes} = @display_codes;

#
# Internal demographic categories
# ALL -- All ages
# OAF -- OAF (Old And Frail or Old As Fuck) 75+ years
# MAL -- MAL (Middle Age Living or Middle Age Loser) 45-74 years
# YAH -- YAH (Young And Healthy or Young And Horrible)) 18-44 years
# SCHOOL -- SCHOOL aged children.  5-17 years
# CHILD -- Children.  0-17 years
#

my @ALLDemo = all_distinct_ages();

my @CHILDDemo = ("0-17");
my @SCHOOLDemo = ("5-17");
my @YAHDemo = ("18-24","25-34","35-44");
my @MALDemo = ("45-54","55-64","65-74");
my @OAFDemo = ("75-84","85+");


#
# First two characters are sort order for output and not returned by demoNames
#

my %DEMOSets = ("01SCHOOL" => \@SCHOOLDemo,
		"02CHILD" => \@CHILDDemo,
		"03YAH" => \@YAHDemo,
		"04MAL" => \@MALDemo,
                "05OAF" => \@OAFDemo,
                "99ALL" => \@ALLDemo);


#
#
#
my $dsn = "DBI:mysql:PHI_DEATH";
my $username = "greg";
my $password = "sh33th3ad";

my %db_attr = (PrintError=>0,RaiseError=>1);
my $dbh = DBI->connect($dsn,$username,$password,\%db_attr);

#my $csv = Text::CSV->new({
    #sep_char => ',',
#    sep_char => "\t",
    
#    binary => 1,
#   auto_diag => 1
#});
#

sub demoNames {
    my @names;

    for my $name (keys %DEMOSets) {
	push(@names,$name);
    }

    @names = sort(@names);

    s/^\d\d// for (@names);
    
    return(@names);
}


sub demoSet {
    my ($demo) = @_;
    my @set;
    my $demomatch;

    die "NO DEMO IN demoSet" if(not defined($demo));
    for my $demoset (keys %DEMOSets) {
	$demomatch = $demoset;

	$demoset =~ s/^\d\d//;
	
	last if($demo eq $demoset);
	
	undef $demomatch;
    }

    die "No demoSet named $demo" if(not defined($demomatch));
    my $demoSets = $DEMOSets{$demomatch};

    @set = @{$demoSets};

    return(@set);
}

sub all_distinct_ages {
    my @ages;

    for my $display_code (@display_codes) {
        if (! ( grep( /^$display_code$/, @nondistinct_codes))) {
            push(@ages,$display_code);
	}
    }

    if($debug > 2) {
        foreach my $distinct_age (@ages) {
            print "Distinct Age: $distinct_age\n";
	}
    }
    return(@ages);
}

#
# DEMO END

#
#
#
my @monthnames = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec","YEAR");
my @monthseason = (6,6,6,2,2,2,5,5,5,4,4,4,8);


my $be = 2019;     # end of base period
my $nbyears = 5;   # Default base span.  Should be odd

#
#  Names of all the external and internal tables
#  External tables are read from Mysql into the hashes mentioned above
#
# Real tables come from the database itself
# Synthetic tables are constructed internally from real tables -- usually by merging two or more tables into one synthetic
#                                                                 or by subtracting a real table from another real table
#                                                                 (example subtract COVID deaths from deaths from disease to
#                                                                 get deaths from all diseases OTHER than COVID
# Canttrend_tables are tables for which we cannot compute a trend.  Usually because they are a disease that did not exist in
#                                                                   the baseline years (e.g. COVID)
#

my %real_tables;        # Tables read out of the database
my %canttrend_tables;   # Tables read out of the database that can't be trended
my %synthetic_tables;   # Tables we make here (i.e. difference tables)

if(1==1) {
    %canttrend_tables = ("01deaths_covid_mcd" => 2,
			 "90deaths_999_ucd" => 0);
    
    %real_tables = ("00deaths_all" => 1,     # Should be first as it is used to fill in missing pop info for other tables
		    "01deaths_influenza_mcd" => 2,
		    "01deaths_disease_mcd" => 2,
		    "01deaths_disease_ucd" => 2,
		    "01deaths_nondisease_ucd" => 2,
		    "02deaths_cancer_ucd" => 2,
		    "02deaths_circulatory_ucd" => 2,
		    "02deaths_respiratory_ucd" => 2,
		    "02deaths_infectparasite_ucd" => 2,
		    "02deaths_blood_ucd" => 2,
		    "02deaths_drugalcoholod_ucd" => 2,
		    "02deaths_drugod_ucd" => 2,
		    "02deaths_alcoholod_ucd" => 2,
		    "02deaths_suicide_ucd" => 2,
		    "90deaths_r99_ucd",0);
    
    %synthetic_tables = ("99deaths_nondisease_mdrugalcohol" => 4,
			 "99deaths_all_mdrugalcohol" => 4,
			 "99deaths_disease_mcovid" => 2);
} else {
    %real_tables = ("01deaths_disease_ucd" => 2,
		    "01deaths_nondisease_ucd" => 2,
		    "90deaths_r99_ucd",0);

    %canttrend_tables = ("01deaths_covid_mcd" => 2,
			 "90deaths_999_ucd" => 0);
    
}


#if(($#ARGV < 1) || ($#ARGV > 4)) {
#   die "usage: compute_trend.pl [--shellvar|  [location] [baseline year end] [number of years comprising baseline]]\n";
#}

#
# Location (i.e. USA or individual state)
#
my $ofile = "USA";

my $syear = $ARGV[0];

sub tablenames {
    my %hash = @_;

    my @tkeys = sort keys(%hash);

    for (@tkeys) { s/^\d\d// };

    return(@tkeys);
}

if($syear eq "--shellvar") {
    print "REAL_TABLES=\"";
    my $d1 = 0;
    foreach my $t (tablenames(%real_tables)) {
	if($d1 > 0) {
	    print " ";
	}
	$d1 = 1;
	print "$t";
    }
    print "\"\n";
	
    print "CANTTREND_TABLES=\"";
    $d1 = 0;
    foreach my $t (tablenames(%canttrend_tables)) {
	if($d1 > 0) {
            print " ";
	}
        $d1 = 1;
	print "$t";
    }
    print "\"\n";
	
    print "SYNTHETIC_TABLES=\"";
    $d1 = 0;
    foreach my $t (tablenames(%synthetic_tables)) {
	if($d1 > 0) {
            print " ";
	}
        $d1 = 1;
	print "$t";
    }
    print "\"\n";
    exit;
}

my $eyear = $ARGV[1];
if($#ARGV >= 2) {
    $ofile = $ARGV[2];
}

my $location = $ofile;
$location =~ s/_/ /g;


#
# Baseline setup
#

if($#ARGV >= 3) {
    $be = $ARGV[3];
}

if($#ARGV >= 4) {
    $nbyears = $ARGV[4];
}

my $bs = $be - ($nbyears-1);

my $launchyr = $bs;

if(($syear < 1999) || ($syear > 2024) || ($eyear < 1999) || ($eyear > 2024) || ($eyear < $syear)) {
    die "years must be between 1999 and 2024 and start year must be less than or equal to end year";
}

#
#  Get months
#  Return the number of months between the current date and a given year and month
#

sub get_months_ago {
    my ($target_year,$target_month) = @_;

    if($target_month == 13) {
	$target_month = 1;
    }
    
    my $current_date = DateTime->now;

    my $target_dt = DateTime->new(
	year   => $target_year,
	month  => $target_month,
	day    => 1,  # Set day to 1 to represent the 1st day of the target month
	);


    #    my $ddifference = $current_date->subtract_datetime($target_dt);
    my $ddifference = $current_date - $target_dt;

    my $md = $ddifference->in_units('months');

    return($md);
}

#

#
#   Merge 99s
#
#   This routine attempts to control for some of the reporting lag by pre-emptively merging both 999 reports and
#   ICD-10 R99 category death certificates (as indicated on the underlying cause of death)
#   It allocates them to either the category of natural death (i.e. death from disease) or
#   to unnatural death (accidents, homicides, etc.) by perentage according to the age of the R99s/999s
#
#

sub allocate_99s {
    my ($receiver_table, $receiver_table_type) = @_;
    
    my @c999schedule = (99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99);          # 999 should not extend beyond six months
    my @cR99schedule = (85, 80, 75, 65, 50, 30, 20, 10, 0, 0, 0, 0, 0, 0, 0, 0);   # R99 are 100% allocated to disease beyond 7 months
    
    my $t1 = "deaths_999_ucd";
    my $t2 = "deaths_r99_ucd";
	    

    #
    # First let's do the 999s
    #
    
    foreach my $year (sort { $a <=> $b } keys %{$deaths{$receiver_table}}) {
	foreach my $month (sort { $a <=> $b } keys %{$deaths{$receiver_table}{$year}}) {
	    my $avg_c999 = 0;
	    my $avg_cR99 = 0;
	    
	    if($month == 13) {

		foreach my $val (@c999schedule) {
		    $avg_c999 = $avg_c999 + $val;
		}

		$avg_c999 = $avg_c999 / scalar(@c999schedule);
		
		foreach my $val (@cR99schedule) {
		    $avg_cR99 = $avg_cR99 + $val;
		}
	    
		$avg_cR99 = $avg_cR99 / scalar(@cR99schedule);
	    }
	    
	    foreach my $age_code (sort keys %{$deaths{$receiver_table}{$year}{$month}}) {

		next if($age_code =~ /Not Stated/);
		
		my $t999deaths;
		my $t999pop;
		if($year >= minyear($t1)) {
		    $t999deaths  = $deaths{$t1}{$year}{$month}{$age_code}{$signal_proc_raw};
		    $t999pop = $population{$t1}{$year}{$month}{$age_code};
		}
		
		my $tc99deaths;
		my $tc99pop;
		if($year >= minyear($t2)) {
		    $tc99deaths = $deaths{$t2}{$year}{$month}{$age_code}{$signal_proc_raw};
		    $tc99pop = $population{$t2}{$year}{$month}{$age_code};
		}

		my $multipliert999;
		my $multipliertc99;

		my $months_ago;

		if($month == 13) {
		    $months_ago = get_months_ago($year,1);
		    if($months_ago >= 24) {
			$multipliert999 = 100;
			$multipliertc99 = 0;
		    } else {
			$multipliert999 = $avg_c999;
			$multipliertc99 = $avg_cR99;
		    }
		} else {
		    $months_ago = get_months_ago($year,$month);
		    $multipliert999 = $c999schedule[min($months_ago,$#c999schedule)];
		    $multipliertc99 = $cR99schedule[min($months_ago,$#cR99schedule)];
		}

		if($receiver_table_type eq "DISEASE") {
		    $multipliert999 = 100-$multipliert999;
		    $multipliertc99 = 100-$multipliertc99;
		}

		my $rdeaths = $deaths{$receiver_table}{$year}{$month}{$age_code}{$signal_proc_raw};
		my $rpop = $population{$receiver_table}{$year}{$month}{$age_code};
		
		if(not defined $rpop) {
		    die "rpop not defined for $receiver_table year $year month $month age code $age_code.  Rdeaths is $rdeaths";
		}

		if($debug > 1) { 
		    if(($receiver_table eq "deaths_disease_ucd") && ($year == 2022) && ($month == 13) && ($age_code =~ /0-17 years/)) {
			print "$receiver_table Year $year Month $month age code $age_code deaths $rdeaths";
		    }
		}
		
		if(defined $t999deaths) {

		    die "allocate_99s Receiver $receiver_table population ne $t1 population ($rpop/$t999pop)" if($rpop != $t999pop);

		    my $givto = int(($t999deaths * ($multipliert999/100)));
		    if($debug > 1) { 
			if(($receiver_table eq "deaths_disease_ucd") && ($year == 2022) && ($month == 13) && ($age_code =~ /0-17 years/)) {
			    print " adding $givto (out of $t999deaths) from 999";
			}
		    }
		
		    if($givto > 0) {
			$rdeaths = $rdeaths + $givto;
		    }
		}
		
		if(defined $tc99deaths) {

		    die "allocate_99s Receiver $receiver_table population ne $t2 population ($rpop/$tc99pop)" if($rpop != $tc99pop);
		    
		    my $givto = int(($tc99deaths * ($multipliertc99/100)));
		    if($debug > 1) { 
			if(($receiver_table eq "deaths_disease_ucd") && ($year == 2022) && ($month == 13) && ($age_code =~ /0-17 years/)) {
			    print " adding $givto (out of $tc99deaths) from c99";
			}
		    }
		    if($givto > 0) {
			$rdeaths = $rdeaths + $givto;
		    }
		}

		if(defined($rdeaths)) {
		    if($debug > 1) { 
			if(($receiver_table eq "deaths_disease_ucd") && ($year == 2022) && ($month == 13) && ($age_code =~ /0-17 years/)) {
			    print " total $rdeaths\n";
			}
		    }
		    $deaths{$receiver_table}{$year}{$month}{$age_code}{$signal_proc_raw} = $rdeaths;
		}
	    }
	}
    }
}

sub greg_max {
    my ($a,$b) = @_;

    if($a >= $b) {
	return($a);
    }

    return($b);
}

sub greg_min {
    my ($a,$b) = @_;

    if($a < $b) {
	return($a);
    }

    return($b);
}

sub greg_uniq {
    my @ilist = sort @_;
    my @olist;

    my $oname = "";
    
    foreach my $name (@ilist) {
	if($name ne $oname) {
	    push(@olist,$name);
	}
	$oname = $name;
    }

    return(@olist);
}

#
#
#
sub distinct_age {
    my ($age_code) = @_;

    if($age_code =~ /ALL AGES/) {
	return(0);
    }
    
    foreach my $check_code (@nondistinct_codes) {
	if($code_map{$check_code} eq $age_code) {
	    return(0);
	}
    }

    return(1);
}
	
#
#
#
sub nicemonth {
    my ($year,$month) = @_;
    my $monthname = $monthnames[$month-1];

    $monthname =~ s/^(.).*/$1/;
    my $niceout = "\"$monthname ";
    if(($month == 2)) {
	$niceout .= "\\n$year";
    } else {
	if(($month > 3) && ($month < 9999)) {
	    $niceout .= "\\n -- ";
	}
    }

    $niceout .= "\"";
    return($niceout);
}

#
#
#
sub table_is_canttrend_table {
    my ($table) = @_;

    foreach my $ctable (tablenames(%canttrend_tables)) {
	if($table =~ /$ctable/) {
	    return(1);
	}
    }

    return(0);
}

sub insert_into_db {
    my ($date,$location,$pop,$demo, $table,$month,$per100K,$eper100K,$rmdeaths,$mdeaths,$medeaths,$season) = @_;

    my $sql = "INSERT INTO EXCESS VALUES (?,?,?,?,?,?,?,?,?,?,?,?);";

    my $stmt = $dbh->prepare($sql);

    print "INSERTING INTO EXCESS $date/$location/$pop/$demo/$table/$month/$rmdeaths/$mdeaths/$medeaths/\n" if($debug > 1);
    $stmt->execute($date, $location, $pop, $demo, $table, $month, $per100K, $eper100K, $rmdeaths, $mdeaths, $medeaths, $season);

    $stmt->finish();
}

sub signal_process {
    my ($table,$year,$month,$demo) = @_;

    if($smooth_auto == 0) {
	return($signal_proc_raw);
    }
    if($smooth_auto == 2) {
	return($signal_proc_cooked);
    }

    # Auto Determine
    
    if($table =~ /_covid_/) {
	return($signal_proc_raw);
    }
    
    if($demo =~ /CHILD|SCHOOL/) {
	return($signal_proc_cooked);
    }

    my $demo_pop = pop_in_demo($table,$year,$month,$demo);

    if(defined($demo_pop)) {
	
	if(($demo eq "OAF") && ($demo_pop >= 2000000)) {
	    return($signal_proc_raw);
	}
	if(($demo eq "MAL") && ($demo_pop >= 6000000)) {
	    return($signal_proc_raw);
	}
	if(($demo eq "YAH") && ($demo_pop >= 10000000)) {
	    return($signal_proc_raw);
	}
    }

    return($signal_proc_cooked);
}

    
	
sub display_ages_for_sql {
    my @ages = @_;
    my $sql;

    my $first = 0;
    for my $display_code (@ages) {
        if($first > 0) {
            $sql .= ","
        }
        $first = 1;
        if($display_code ne "ALL AGES") {
            $sql .= "\"$code_map{$display_code}\"";
        }
    }

    return($sql);
}


#
#
#  Read the base data in
#
#

sub read_data {
    my ($table,$shortstop) = @_;
    
    my $sql = "select \"$location\",year(date_taken),month(date_taken), age_code,round(sum(population),0),sum(deaths_actual)\nfrom ${table}_month\nwhere year(date_taken) <= $eyear";

    if($location ne "USA_FROM_STATES") {
	$sql .= "\nand location=\"$location\"";
    }

    $sql .= "\ngroup by 1,2,3,4";

    $sql .= "\nUNION\n";

    $sql .= "select \"$location\",year(date_taken),month(date_taken), \"ALL AGES\",round(sum(population),0),sum(deaths_actual)\nfrom ${table}_month\nwhere year(date_taken) <= $eyear";

    $sql .= "\nand age_code in (" . display_ages_for_sql(@ALLDemo) . ")";
    
    if($location ne "USA_FROM_STATES") {
	$sql .= "\nand location=\"$location\"";
    }

    $sql .= "\ngroup by 1,2,3,4";

    $sql .= "\nUNION\n";

    $sql .= "select \"$location\",year(date_taken),13, age_code, round(sum(population),0),sum(deaths_actual)\nfrom ${table}_annual\nwhere year(date_taken) <= $eyear";

    if($location ne "USA_FROM_STATES") {
	$sql .= "\nand location=\"$location\"";
    }

    $sql .= "\ngroup by 1,2,3,4";

    $sql .= "\nUNION\n";

    $sql .= "select \"$location\",year(date_taken),13, \"ALL AGES\",round(sum(population),0),sum(deaths_actual)\nfrom ${table}_annual\nwhere year(date_taken) <= $eyear";

    $sql .= "\nand age_code in (" . display_ages_for_sql(@ALLDemo) . ")";
    
    if($location ne "USA_FROM_STATES") {
	$sql .= "\nand location=\"$location\"";
    }

    $sql .= "\ngroup by 1,2,3,4 order by 1,2,3,4;";


    print "***Reading TABLE: $table.\n" if($debug > 0);

    my $stmt = $dbh->prepare($sql);

    $stmt->execute();

    #
    #
    #
    
    while(my @row = $stmt->fetchrow_array()) {
	my ($location,$year,$month,$age_code,$population,$deaths) = @row;
	my $months_ago = get_months_ago($year,$month);

	if(defined($population)) {
	    $population{$table}{$year}{$month}{$age_code} = $population;
	}
	       
	if(defined($deaths)) {

	    #
	    # Adjust for lag
	    #
	    if($month == 13) {
		#
		# Adjust annual totals upwards if needed
		#

		# Months ago is since the beginning of $year
		my $have_months;

		$have_months = $months_ago + 1;
		my $short_magic = 0.5;
		$have_months = $have_months - ($shortstop * $short_magic);
		
		if(($have_months > 0) && ($have_months <= 12)) {
		    $deaths = int($deaths * (12/$have_months));
		}
	    } else {
		#
		# Adjust month totals upwards if needed
		#
	    }

	    #
	    #
	    #
    
	    $deaths{$table}{$year}{$month}{$age_code}{$signal_proc_raw} = $deaths;
	}
    }

    $stmt->finish();
}

#
# Now compute the trends
#
my %trend_P100K;

#############################################################################
#
#  Noise reduction section


#
#   Moving Average Smoothing algorithm
#

sub moving_average {
    my ($array_ref, $window_size) = @_;
    my @smoothed_values;
    my $half_window = int($window_size / 2);

    for my $i (0 .. $#{$array_ref}) {
	    my $sum = 0;
	    my $count = 0;

	    for my $j (-$half_window .. $half_window) {
		my $index = $i + $j;

		if ($index >= 0 && $index <= $#{$array_ref}) {
		    if(defined($array_ref->[$index])) {
			$sum += $array_ref->[$index];
		    } else {
			die "Undefined deaths in moving average";     # Theoretically because we assign one death in every suppressed case (in read_data), this should never happen.  We shall see
		    }
		    $count++;
		}
	    }

	    my $average = $count ? $sum / $count : 0;
	    push @smoothed_values, $average;
    }

    return \@smoothed_values;
}

#
# LOESS smoothing/noise reduction function
#

# LOESS function

# LOESS function

use strict;
use warnings;

sub loess {
    my ($xref, $yref, $span, $degree) = @_;

    my @x = @$xref;
    my @y = @$yref;
    my $n = scalar @x;

    my @fitted_values;

    for (my $i = 0; $i < $n; $i++) {
        my $x0 = $x[$i];
        my $sum_weights = 0;
        my $weighted_sum_x = 0;
        my $weighted_sum_y = 0;

        for (my $j = 0; $j < $n; $j++) {
            my $xj = $x[$j];
            my $distance = abs($xj - $x0);
            my $weight = tricube_weight($distance / $span);
            $sum_weights += $weight;
            $weighted_sum_x += $weight * $xj;
            $weighted_sum_y += $weight * $y[$j];
        }

        my $fitted_value = $weighted_sum_y / $sum_weights;
        push @fitted_values, $fitted_value;
    }

    return \@fitted_values;
}

sub tricube_weight {
    my $x = shift;
    return ($x >= 0 && $x <= 1) ? (1 - $x ** 3) ** 3 : 0;
}

#
#
###################################################################

#
#
#
#

sub compute_trendline_Statistics {
    my ($years,$deaths,$table,$month,$demo) = @_;
    my $sdeaths;
    
    if($smooth_baseline && (signal_process($table,$bs,1,$demo) eq $signal_proc_raw)) {
	# We only signal process the trendline if the entire demo isn't already being signal processed

	#
	# First, smooth the data if necessary
	#

	print "CALLING MOVING_AVERAGE FROM COMPUTE TRENDLINE STATISTICS FOR TABLE $table Years $years, Month $month Demo $demo\n" if($debug > 1);
	if($signal_proc_cooked eq "ma") {
	    print "compute_trendline_statistics: CALLING MOVING_AVERAGE on Table $table\n" if($debug > 1);
	    $sdeaths = moving_average($deaths,$smooth_window_trends);
	} elsif ($signal_proc_cooked eq "loess") {
	    print "compute_trendline_statistics: CALLING LOESS on Table $table\n" if($debug > 1);
		
	    $sdeaths = loess($years,$deaths,$loess_span_trends, $loess_degree_trends);
	} else {
	    die "Unknown signal processing algorithm $signal_proc_cooked";
	}
	
    } else {
	$sdeaths = $deaths;
    }

    #
    # Now extract only the baseline years and deaths from the death vectors
    #
    #
    # Repair the baseline by filling in any missing years
    #
    
    my @byears;
    my @bdeaths;
    my $lastDeaths;

    my $loopYear = @{$years}[0];
    for (my $i = 0; $i <= $#{$years} ; $i++) {
	my $nowYear = @{$years}[$i];
	if(defined($nowYear)) {
	    if(($nowYear >= 0) && ($nowYear <= $nbyears)) {
		my $nowDeaths = ${$sdeaths}[$i];
		if($nowYear > $loopYear) {  # There were years missing!
		    if(defined($lastDeaths)) {
			my $lostYears = $nowYear - $loopYear;
			while($loopYear < $nowYear) {
			    push(@byears,$loopYear);
			    push(@bdeaths, ($nowDeaths + $lastDeaths) / (($lostYears)+1));
			    $loopYear++;
			}
		    }
		}
		push(@byears,$nowYear);
		push(@bdeaths,$nowDeaths);
		$lastDeaths = $nowDeaths;
	    }
	    $loopYear = $nowYear + 1;
	} else {
	    die "NOWYEAR NOT DEFINED";
	}
    }

    #
    #
    #
    
    my $trendPoints = scalar(@{$years});

    die "byears ne bdeaths" if(scalar(@byears) != scalar(@bdeaths));
    die "trendpoints ne bdeaths" if($trendPoints != scalar(@bdeaths));
    die "trendpoints ne sdeaths" if($trendPoints != scalar(@{$sdeaths}));

    if($trendPoints < $minTrendPoints) {
	return(undef,undef,undef,$trendPoints);
    }

    #
    #
    #
    
    if($debug > 0) {
	if(($table eq "deaths_disease_ucd") && ($demo eq "YAH") && ($month==4)) {
	    my $bmax = $#byears;
	    my $omax = $#{$deaths};
	    my $smax = $#{$sdeaths};
	    printf "INPUT TO LineFit.  Omax $omax  Smax $smax  Bmax $bmax\n";
	    for (my $i = 0; $i <= $omax;$i++) {
		my $odeaths = @{$deaths}[$i];
		my $oyear = @{$years}[$i];
		my $sdeaths = @{$sdeaths}[$i];
		my $bdeaths = $bdeaths[$i];
		my $byear = $byears[$i];
		if(not defined($byear)) {
		    $byear = "";
		}
		if(not defined($bdeaths)) {
		    $bdeaths = "";
		}

		print "------ $table YAH Apr Year $oyear ODeaths $odeaths SDeaths $sdeaths BYear $byear BDeaths $bdeaths\n";
	    }
	}
    }

	

    #
    #
    #

    # Do the Linefit

    my $line_fit = Statistics::LineFit->new();
    
    $line_fit->setData($years, $sdeaths) or die "Invalid data points";


    #
    #
    #

    # Perform linear regression
    my ($intercept, $slope) = $line_fit->coefficients();

    # Calculate R-squared (R2)
    my $r_squared = $line_fit->rSquared();

#    if(($table =~ /deaths_disease_ucd/) && ($demo =~ /0-17/) && ($month == 7)) {
#	print "$table: Linear Regression: Y = $slope * X + $intercept\n";
#    }
#    print "R-squared (R2): $r_squared\n";
    
    return($slope,$intercept,$r_squared,$trendPoints);
}

#
# Logs of troubles creating trends
#
my %trend_trouble;
my %multiplier_trouble;
my %multi_sub_trouble;
#

sub print_trend_troubles {
    foreach my $trouble_table (keys(%trend_trouble)) {
	print "ALERT: $location could not compute trends for table $trouble_table for demos: ";
	my $numentries = scalar(@{$trend_trouble{$trouble_table}});
	if($numentries > 10) {
	    print " $numentries entries";
	} else {
	    foreach my $trouble_demo (uniq(@{$trend_trouble{$trouble_table}})) {
		print "$trouble_demo ";
	    }
	}
	print("\n");
    }
    
    foreach my $trouble_table (keys(%multiplier_trouble)) {
	print "WARNING: $location could not provide multipliers for for table $trouble_table: ";
	my $numentries = scalar(@{$multiplier_trouble{$trouble_table}});
	if($numentries > 10) {
	    print " $numentries entries";
	} else {
	    foreach my $trouble_text (uniq(@{$multiplier_trouble{$trouble_table}})) {
		print "$trouble_text ";
	    }
	}
	print("\n");
    }
    
    foreach my $trouble_table (keys(%multi_sub_trouble)) {
	print "WARNING: $location annual trend substitition for $trouble_table: ";
	my $numentries = scalar(@{$multi_sub_trouble{$trouble_table}});
	if($numentries > 10) {
	    print " $numentries entries";
	} else {
	    foreach my $trouble_text (uniq(@{$multi_sub_trouble{$trouble_table}})) {
		print "$trouble_text ";
	    }
	}
	print("\n");
    }
}


sub compute_trends {
    my ($table) = @_;

    #
    # 
    # For every demographic
    #    For every month
    #        For every year
    #             1.  Calculate the percent difference between the per-capita deaths at that point compared to the per-capita deaths in $byear
    #             2.  Add to the total difference
    #        End
    #        Divide the total difference by the number of years in the baseline (give average)
    #        Store the difference as the slope of the trend line with x & y intercept at $byear
    #    End
    #  End

    #    print "Doing $table....\n";
    
    foreach my $demo (demoNames()) {
	for (my $month = 1;$month <= 13;$month++) {
	    my @deathVecP100K;
	    my @yearVec;

#	    for(my $year = minyear($table); $year <= $be;$year++) {     #  We let the moving average smoother "sniff" at the years prior to the baseline but not see any years of the pandemic
	    for(my $year = $bs; $year <= $be;$year++) {
		my $sp;
		    
		if($smooth_baseline == 1) {
		    $sp = $signal_proc_cooked;
		} else {
		    $sp = $signal_proc_raw;
		}
		
		my $demoDeaths = deaths_in_demo($table,$year,$month,$demo,"P100K",$sp);
		if(defined($demoDeaths)) {
		    push(@deathVecP100K,$demoDeaths);
		    push(@yearVec,$year-$bs);   # $bs is year zero
		} else {
		}
	    }

	    my ($m,$b,$r_squared);
	    my $trendpoints;
	    
	    ($m,$b,$r_squared,$trendpoints) = compute_trendline_Statistics(\@yearVec,\@deathVecP100K,$table,$month,$demo);

	    if($trendpoints >= $minTrendPoints) {
		if($trendpoints <= $minTrendPointsWARN) {
		    print "WARNING: Only $trendpoints trendpoints for $table Demo $demo Month $monthnames[$month-1] baseline in $location\n" if($debug > 0);
		}

		    
#		print "My m is $m, theirs is $ms, my b is $b, theirs is $bs.  R_Squared is $r_squared for theirs\n";
		
		$trend_P100K{$table}{$month}{$demo} = [$m,$b,$r_squared];

		if($r_squared < 0.5) {
		    if(! ($table =~ /_999_|_r99_/)) {
    			print "$location: Low quality trend warning.  Table $table month $monthnames[$month-1] demo $demo.  R2 is $r_squared\n" if($debug > 1);
		    }
		}
	    } else {
		    push(@{$trend_trouble{$table}},$demo);
	    }
	}
    }
    
}

#
# How confident are we?
#

sub confidence {
    my ($rsquared) = @_;
    my $mmult = 1.0;
    my $bmult = 1.0;
    my @bmults = (1.0,1.0,1.0,1.0);
#    my @mmults = (1.0,0.95,0.9,0.85);    #  Smaller the number, the less of a slope
    #    my @mmults = (1.0,0.98,0.96,0.94);    #  Smaller the number, the less of a slope
    my @mmults = (1.0,1.0,1.0,1.0);
    
    $mmult = $mmults[3];
    $bmult = $bmults[3];
    
    if($rsquared > 0.70) {
	$mmult = $mmults[0];
	$bmult = $bmults[0];
    }
    
    if($rsquared > 0.50) {
	$mmult = $mmults[1];
	$bmult = $bmults[1];
    }
    if($rsquared > 0.30) {
	$mmult = $mmults[2];
	$bmult = $bmults[2];
    }

    return($mmult,$bmult);
}

#
# Linear trendline for starters
#

sub calc_trendmultiplier_demo {
    my ($table,$year,$month,$demo,$mode) = @_;
    my $deaths;  # Can be either P100K or RAW

    if(table_is_canttrend_table($table)) {
	return(deaths_in_demo($table,$year,$month,$demo,$mode));
    }

    if(not defined $trend_P100K{$table}{$month}{$demo}) {
	print "Warning: $location table $table no trend multiplier for year $year month $monthnames[$month-1] for demo $demo.  Will try and substitute annual\n" if(0==0);
	push(@{$multi_sub_trouble{$table}},"$demo $year/$monthnames[$month-1]");
	
	if(defined ($trend_P100K{$table}{13}{$demo})) {
	    my $madj = 12;
	    my $badj = 12;
		
	    my ($m,$b,$r_squared) = @{$trend_P100K{$table}{13}{$demo}};

	    $trend_P100K{$table}{$month}{$demo} = [$m / $madj,$b / $badj,$r_squared];
	}
    }
	
    if(defined $trend_P100K{$table}{$month}{$demo}) {
	my ($m, $b, $r_squared) = @{$trend_P100K{$table}{$month}{$demo}};

	die "table $table year $year month $month demo $demo trend values not defined" if(not (defined($m)));

	if($adj_confidence) {
	    my ($ms,$bs) = confidence($r_squared);
	    $m = $m * $ms;
	    $b = $b * $bs;
	}

	$deaths = (($m) * ($year-$bs)) + ($b);
	if($mode eq "P100K") {
	    # Nothing
	} elsif($mode eq "RAW") {
	    my $pop = pop_in_demo($table,$year,$month,$demo);
	    if(defined($pop)) {
		$deaths = $deaths * ($pop / 100000);
	    } else {
		$deaths = undef;
	    }
	} else {
	    die "Unknown mode in calc_trendmultiplier";
	}

    } else {
	print "Warning: $location table $table could not provide trend multiplier for $location year $year month $monthnames[$month-1] for demo $demo\n" if($debug > 0);
	push(@{$multiplier_trouble{$table}},"$demo $year/$month NM");
    }

    if(defined($deaths) && ($deaths < 0)) {
	$deaths = 0;
    }
    return($deaths);
}

sub r2value_demo {
    my ($table,$month,$demo) = @_;

    if(not defined($trend_P100K{$table}{$month}{$demo})) {
	return(0);
    }

    my ($d1,$d2,$r_squared) = @{$trend_P100K{$table}{$month}{$demo}};

    return($r_squared);
}
#
# Output the trend
#

sub deaths_in_demo {
    my ($table,$year,$month,$demo,$mode,$signalproc) = @_;

    my $deaths;
    my $population;

    die "NO DEMO IN deaths_in_demo" if(not defined($demo));
    print "Table $table year $year month $month deaths in demo $demo " if($debug > 1);

    if(not defined($signalproc)) {
	$signalproc = signal_process($table,$year,$month,$demo);
    } else {
#	print "\tDEATHS IN DEMO SIGNAL PROC DEFINED AS $signalproc\n";
    }

    
    foreach my $display_code (demoSet($demo)) {
	my $d = $deaths{$table}{$year}{$month}{$code_map{$display_code}}{$signalproc};
	my $p = $population{$table}{$year}{$month}{$code_map{$display_code}};
	if(defined($d)) {
	    $deaths += $d;
	}
	if(defined($p)) {
	    $population += $p;
	}
    }

    if(not defined($deaths)) {
	print "WARNING deaths_in_demo:Deaths not defined for demo $demo in $table/$year/$month\n" if($debug > 1);
	print "UNDEF\n" if($debug > 1);
        return(undef);
    }
    
    if($mode eq "RAW") {
	print "$deaths\n" if($debug > 1);
	return($deaths);
    } elsif($mode eq "P100K") {
	if(defined($population)) {
	    print "100K $deaths\n" if($debug > 1);
	    return($deaths / ($population / 100000));
	} else {
	    print "WARNING deaths_in_demo:Population not defined for demo $demo in $table/$year/$month" if($debug > 1);
	    print "UNDEF POP\n" if($debug > 1);
	    return(undef);
	}
    }

    die "Unknown mode $mode in deaths_in_demo";
}

sub pop_in_demo {
    my ($table,$year,$month,$demo) = @_;
    my $pop_table = "deaths_all";

    my $population;
    die "NO DEMO in pop_in_demo" if(not defined($demo));
    foreach my $display_code (demoSet($demo)) {
	my $p = $population{$table}{$year}{$month}{$code_map{$display_code}};
	
	if(not defined($p)) {
	    $p = $population{$pop_table}{$year}{$month}{$code_map{$display_code}};
	}
	
	if(defined($p)) {
	    $population += $p;
	}
	
    }

    print "Table $table year $year month $month population in demo $demo $population" if($debug > 1);
    return($population);
}


sub output_trends {
    my ($table,$filename) = @_;

    open(FH, '>', $filename) or die "$filename : $!";
    print FH "Location,$table,Year";

    for (my $month = 1;$month <= 13;$month++) {
	foreach my $demo (demoNames()) {
	    my $r_squared = sprintf("\"R2 %3.0f%%\"",r2value_demo($table,$month,$demo) * 100);
	    print FH ",$monthnames[$month-1] $demo RAW,$monthnames[$month-1] $demo SMOOTH, $monthnames[$month-1] $demo TREND,$r_squared";
	}
    }

    print FH "\n";

    for(my $year = $bs; $year <= $be;$year++) {
	print FH "$location,$table,";
	my $date = sprintf("%04d-01-01",$year);
	print FH "$date";
	
	for (my $month = 1;$month <= 13;$month++) {
	    foreach my $demo (demoNames()) {
		my $bpop = pop_in_demo($table,$bs,$month,$demo) /   100000;
		my $npop = pop_in_demo($table,$year,$month,$demo) / 100000;

		my $byear_r = deaths_in_demo($table,$bs,$month,$demo,"P100K",$signal_proc_raw);
		my $nyear_r = deaths_in_demo($table,$year,$month,$demo,"P100K",$signal_proc_raw);
		
		my $byear_s = deaths_in_demo($table,$bs,$month,$demo,"P100K",$signal_proc_cooked);
		my $nyear_s = deaths_in_demo($table,$year,$month,$demo,"P100K",$signal_proc_cooked);

		my $byear_t = calc_trendmultiplier_demo($table,$bs,$month,$demo,"P100K");
		my $nyear_t = calc_trendmultiplier_demo($table,$year,$month,$demo,"P100K");

		if($table =~ /deaths_all_|deaths_disease_|deaths_nondisease_/) {
		    if((not defined($byear_r)) || ($byear_r == 0)) {
			print "INFO: no raw base year ($bs-$month) for table $table demo $demo\n" if($debug > 0);
		    } else {
			if((not defined($byear_s)) || ($byear_s == 0)) {
			    print "WARNING: no smoothed base year for table $table demo $demo\n";
			}
		    }
		    if((not defined($byear_t)) || ($byear_t == 0)) {
			print "ALERT: no trended base year for table $table demo $demo\n";
		    }
		}

		if(defined($byear_r) && defined($nyear_r)) {
		    my $rPct;
		    if(($byear_r == 0)||($nyear_r == 0)) {
			$rPct = 0;
		    } else {
			$rPct = ($nyear_r / $npop) / ($byear_r / $bpop);
			$rPct -= 1;
		    }
		    print FH ",$rPct";
		} else {
		    print FH ",NaN";   # Can't be blank or Gnuplot freaks out
		}
		if(defined $byear_s && defined($nyear_s)) {
		    my $sPct;
		    if(($byear_s == 0)||($nyear_s == 0)) {
			$sPct = 0;
		    } else {
			$sPct = ($nyear_s / $npop) / ($byear_s / $bpop);
			$sPct -= 1;
		    }
		    print FH ",$sPct";
		} else {
		    print FH ",NaN";   # Can't be blank or Gnuplot freaks out
		}
		if(defined $byear_t && defined($nyear_t)) {
		    my $tPct;
		    if(($byear_t == 0)||($nyear_t == 0)) {
			$tPct = 0;
		    } else {
			$tPct = ($nyear_t / $npop) / ($byear_t / $bpop);
			$tPct -= 1;
		    }
		    print FH ",$tPct";
		} else {
		    print FH ",NaN";   # Can't be blank or Gnuplot freaks out
		}
		my $r_squared = sprintf("%3.0f",r2value_demo($table,$month,$demo) * 100);
		print FH ",$r_squared";
	    }
	}
	
	print FH "\n";
    }

    print FH "\n";

    close(FH);
}

#
# Finally, output the data
#

sub output_data {
    my ($table,$filename,$incraw) = @_;

    open(FH, '>', $filename) or die $!;

    print FH "Location,Table,Year";

    foreach my $monthname (@monthnames) {
        foreach my $demo (demoNames()) {
	    if($incraw) {
		print FH ",$monthname $demo total deaths,$monthname $demo population";
	    } else {
		print FH ",$monthname $demo Per 100K";
	    }
        }
    }

    print FH "\n";


    for (my $year = $syear;$year <= maxyear($table);$year++) {
	print FH "$location,$table,";
	#	my $date = sprintf("%04d-01-01",$year);
		my $date = sprintf("\\n%04d",$year);
	print FH "$date";

	if(get_months_ago($year,1) < $est_window) {
	    print FH "(*EST)";
	}

	for(my $month = 1; $month <= 13; $month += 1) {
	    foreach my $demo (demoNames()) {
		my $demoDeath;
		my $demoDeathP100k;
		my $demoPop;
		
		foreach my $display_code (demoSet($demo)) {
		    #		    my $death = $deaths{$table}{$year}{$month}{$code_map{$display_code}}{signal_process($table,$year,$month,$demo)};
		    my $death = $deaths{$table}{$year}{$month}{$code_map{$display_code}}{$signal_proc_raw};
		    my $pop = $population{$table}{$year}{$month}{$code_map{$display_code}};
		    if(defined($death) && defined($pop)) {
			$demoDeath += $death;
			$demoPop += $pop;
		    }
		}
		
		if(defined($demoDeath) && defined($demoPop)) {
		   $demoDeathP100k = $demoDeath / ($demoPop / 100000);
		   if($incraw) {
		       print FH ",$demoDeath,$demoPop";
		   } else {
		       print FH ",$demoDeathP100k";
		   }
		} else {
		    if($incraw) {
			print FH ",NaN,NaN";  #  Can't be blank or Gnuplot freaks out
		    } else {
			print FH ",NaN";  #  Can't be blank or Gnuplot freaks out
		    }
		}
	    }
	}
	print FH "\n";
    }

    close(FH);
}

sub get_shortstop {
    my ($table) = @_;

    my %tables =  (%real_tables,%canttrend_tables,%synthetic_tables);

    foreach my $tab (keys(%tables)) {
	my $nz = $tab;
	$nz =~ s/^\d\d//;
	if($table eq $nz) {
	    return($tables{$tab});
	}
    }

    die "Can't find shortstop for $table";
}

sub output_months1 {
    my ($table,$filename,$period,$demo) = @_;

    open(FH, '>', $filename) or die $!;

    print FH "Location,Table,Month,YAH Per 100K\n";

    my %monthtot;
    my %poptot;
    my $minyear;
    my $maxyear;

    my $shortstop;

    $shortstop = get_shortstop($table);

    my $cur_year = 1900 + (localtime)[5];
    
    for(my $month = 1; $month <= 13; $month += 1) {    
	foreach my $year (sort { $a <=> $b } keys %{$deaths{$table}}) {
	    if($period eq "BASELINE") {
		next if($year < $bs);
		next if($year > $be);
	    } else {
		next if($year <= $be);
		#		next if(get_months_ago($year,$month) < $shortstop);
		next if($year >= $cur_year);
	    }

	    my $d = deaths_in_demo($table,$year,$month,$demo,"RAW");
	    if(defined($d)) {
		$monthtot{$month} += $d;
	    }
	    my $p = pop_in_demo($table,$year,$month,$demo);

	    if(defined($p)) {
		$poptot{$month} += $p;
	    }
	}
    }
    
    for(my $month = 1; $month <= 13; $month += 1) {
	my $deaths = $monthtot{$month};
	next if(! $deaths);

	print FH "$location,$table";
	if($month == 13) {
	    print FH ",AVERAGE";
	} else {
	    print FH ",$monthnames[$month-1]";
	}

	
	if(defined $deaths) {
	    my $pop = $poptot{$month};
	    die "POP ZERO $pop deaths $deaths demo $demo table $table month $month" if((not defined($pop)) || ($pop == 0));
	    my $per100K = $deaths/($pop/100000);

	    if($month == 13) {
		$per100K = $per100K/12;
	    }
	    print FH ",$per100K";
	} else {
	    print FH ",NaN";
	}

	my $season = $monthseason[$month-1];
	print FH ",$season\n";
	
    }
    close(FH);
}

sub output_months2 {
    my ($table,$filename,$demo) = @_;

    open(FH, '>', $filename) or die $!;

    print FH "Date,Location,Table,Month,Expected $demo Per 100K,Actual $demo Per 100K,Season\n";

    my $shortstop;

    $shortstop = get_shortstop($table);
    
    for(my $year = $syear; $year <= maxyear($table); $year += 1) {
	print "****Months2: location $location table $table year $year\n" if($debug > 2);
	for(my $month = 1;$month <= 13;$month += 1) {
	    if(($month <= 12) && (get_months_ago($year, $month) < $shortstop)) {
		$month = 13;  # Cause it to barf out the annual stats and quit
	    }

	    my $mpop = pop_in_demo($table,$year,$month,$demo);
		
	    my $per100K = deaths_in_demo($table,$year,$month,$demo,"P100K");
	    my $rmdeaths = deaths_in_demo($table,$year,$month,$demo,"RAW",$signal_proc_raw);
	    my $mdeaths = deaths_in_demo($table,$year,$month,$demo,"RAW");
	    my $medeaths = calc_trendmultiplier_demo($table,$year,$month,$demo,"RAW");
	    my $eper100K = calc_trendmultiplier_demo($table,$year,$month,$demo,"P100K");

	    if(not defined($per100K)) {
		$per100K = "NaN";
	    }
	    if(not defined($eper100K)) {
		$eper100K = "NaN";
	    }
	    if(not defined($rmdeaths)) {
		$rmdeaths = "NaN";
	    }
	    if(not defined($mdeaths)) {
		$mdeaths = "NaN";
	    }
	    if(not defined($medeaths)) {
		$medeaths = "NaN";
	    }

	    print "OUTPUT: table $table demo $demo year $year month $month mpop $mpop rmdeaths $rmdeaths mdeaths $mdeaths medeaths $medeaths eper100K $eper100K, per100K $per100K\n" if($debug > 1);
	    
	    my $date;
	    if($month <= 12) {
		$date = sprintf("%04d-%02d-01",$year,$month);
		my $season = $monthseason[$month-1];
		my $nicemonth = nicemonth($year,$month);
		$date = sprintf("%04d-%02d-01",$year,$month);
		print FH "$date,$location,$table,$nicemonth,$eper100K,$per100K,$season\n";
	    } else {
		$date = sprintf("%04d-01-01",$year);
	    }

	    
	    insert_into_db($date, $location, $mpop, $demo, $table, $month,
			   ($per100K eq "NaN" ? undef : $per100K), ($eper100K eq "NaN" ? undef : $eper100K),
			   ($rmdeaths eq "NaN" ? undef : $rmdeaths), ($mdeaths eq "NaN" ? undef : $mdeaths), ($medeaths eq "NaN" ? undef : $medeaths),
			   0);
	}
	print "\n" if($debug > 2);
    }
    close(FH);
}

sub generate_difference {
    my ($dest,$t1,$t2) = @_;

    foreach my $year (sort { $a <=> $b } keys %{$population{$t1}}) {
	foreach my $month (sort { $a <=> $b } keys %{$population{$t1}{$year}}) {
	    foreach my $age_code (sort keys %{$population{$t1}{$year}{$month}}) {
		my $t1deaths = $deaths{$t1}{$year}{$month}{$age_code}{$signal_proc_raw};
		my $t2deaths = $deaths{$t2}{$year}{$month}{$age_code}{$signal_proc_raw};
		my $t1pop = $population{$t1}{$year}{$month}{$age_code};
		my $t2pop = $population{$t2}{$year}{$month}{$age_code};

		if((! ($age_code =~ /Not Stated/)) && ((! looks_like_number($t1pop)) || ($t1pop <= 0))) {
		    die "FATAL: Generate_Difference: Population $year/$month/$age_code not defined or zero ($t1pop) in $t1!\n";
		}

		if(defined($t2deaths)) {

		    if(!($age_code =~ /Not Stated/) && (not defined($t2pop))) {
			die "FATAL: Generate_Difference: Population $year/$month/$age_code not defined in $t2!\n";
		    }

		    if(! ($age_code =~ /Not Stated/)) {
			die "FATAL: Generate_Difference: Population $t1 $year/$month/$age_code does not match $t2 !\n" if($t1pop != $t2pop);
		    }

		    if(($age_code =~ /Not Stated/) || defined($t1pop)) {
			$population{$dest}{$year}{$month}{$age_code} = $t1pop;
			if(defined($t1deaths)) {
			    $deaths{$dest}{$year}{$month}{$age_code}{$signal_proc_raw} = $t1deaths - $t2deaths;
			}

		    }
		} else {  # Else nothing to do (nothing to subtract)
		    $population{$dest}{$year}{$month}{$age_code} = $t1pop;
		    $deaths{$dest}{$year}{$month}{$age_code}{$signal_proc_raw} = $t1deaths;
		} 
	    }
	}
    }
}
    

sub check_data {

    foreach my $table (keys %deaths) {
	foreach my $year (sort { $a <=> $b } keys %{$deaths{$table}}) {
	    my $tpop = 0;
	    my $tdeath = 0;
	    my $months = 0;
	    my $popAnnual = $population{$table}{$year}{13}{"ALL AGES"};

	    next if(not defined($popAnnual));  # Some years have no deaths from a given disease (ex COVID)

	    my $mPop;
	    
	    foreach my $month (sort { $a <=> $b } keys %{$deaths{$table}{$year}}) {
		print "---- Table $table Year: $year Month: $month\n" if($debug > 1);
		$mPop = 0;
		#		foreach my $age_code (sort keys %{$deaths{$table}{$year}{$month}}) {
		foreach my $age_code (sort keys %{$population{$table}{$year}{$month}}) {
		    my $deaths = $deaths{$table}{$year}{$month}{$age_code}{$signal_proc_raw};
		    my $pop = $population{$table}{$year}{$month}{$age_code};
		    my $aPopAnnual = $population{$table}{$year}{13}{$age_code};

		    if(defined $deaths) {
			if(!($age_code =~ /Not Stated/)) {
			    die "check_data:$table $year $age_code annual population does not look like a number ($aPopAnnual)" if(not looks_like_number($aPopAnnual));
			    die "check_data:$table $year $month $age_code  population is undefined for $age_code" if(not looks_like_number($pop));
			    die "check_data:$table $year $month $age_code month population $pop not equal to annual population $aPopAnnual" if($pop != $aPopAnnual);
			    die "check_data: Table $table Year $year Month $month Demo $age_code population not defined" if(! defined($pop));
			    
			    my $tp100K = $deaths/($pop/100000);
			}


			if(distinct_age($age_code) && $month != 13) {
			    $tdeath += $deaths;
			}
		    }
		    
		    if(distinct_age($age_code) && $month != 13) {
#			print "Age code $age_code is distinct\n";
			if(!($age_code =~ /Not Stated/)) {
			    die "check_data: Table $table Pop not defined for age code $age_code year $year month $month" if(not (defined $pop));
#			    print "Adding population of $age_code ($pop) to total population\n" if($table eq "deaths_all");
			    $tpop += $pop;
			    $mPop += $pop;
			}
		    }

		    if($debug > 1 && ($month != 13)) {
			print "\t++++Table $table Year $year Month $month Age Code $age_code Pop $pop Month Pop $mPop Population Annual $popAnnual Total pop $tpop\n";
		    }
		}
		$months += 1;
	    }

#	    print "\*** MPOP $mPop.    TPOP  $tpop\n";
	    if($months > 1) {
		$tpop /= ($months-1);
	    }
	    print "+++++++++++++++Table $table Year $year ($months months) Total pop $tpop  Popannual $popAnnual  Total death $tdeath\n" if($debug > 1);
	    die "***** Check Data: Table $table Year $year ($months months) PopAnnual not set\n" if(not defined($popAnnual));
	    die "***** Check Data: Table $table Year $year ($months months) PopAnnual is $popAnnual\n" if(not looks_like_number($popAnnual));
	    if(($tpop != $popAnnual) && ($months == 13)) {
		print "WARNING Check Data: Table $table Year $year $months months) Total Pop $tpop Not equal to PopAnnual $popAnnual\n";
	    }
	}
	print "Table $table checks in check_data\n" if($debug > 0);
    }
}

sub maxyear {
    my ($table) = @_;

    my @years = sort { $b <=> $a }(keys %{$deaths{$table}});

    die "Table $table no years" if($#years < 0);

    my $maxyear = $years[0];

    my (@tm) = localtime(time());

    if(! ($table =~ /deaths_all$|deaths_999_ucd|deaths_r99_ucd/)) {
	my ($maxyear_d,$maxmonth_d,$maxday_d) = Add_Delta_YM($tm[5]+1900,$tm[4],$tm[3], 0, -3);  # Three months ago

	if($maxyear_d < $maxyear) {
	    $maxyear = $maxyear_d;
	}
    }

    print "Returning Maximum Year $maxyear for $table\n" if($debug > 2);
    return($maxyear);
}

sub minyear {
    my ($table) = @_;

    my @years = sort { $a <=> $b }(keys %{$deaths{$table}});

    die "Table $table no years" if($#years < 0);

    my $minyear = $years[0];
    if($table eq "deaths_999_ucd") {
	print "Returning Minimum Year $minyear for $table\n" if($debug > 2);
    }

    return($minyear);
}

sub maxmonth {
    my ($table,$year) = @_;
    
    my @months = sort { $b <=> $a } (keys %{$deaths{$table}{$year}});

    die "Table $table no years" if($#months < 1);   # Month 13 (the year) will always be there so this array should always have at least two entries

    my $maxmonth = $months[1];

    die "table $table year $year.  months[0] !!= 13 or maxmonth ($maxmonth) > 12" if(($months[0] != 13)||($maxmonth > 12));

    print "Returning Month $maxmonth for $table Year $year\n" if($debug > 2);

    return($maxmonth);
}

sub noise_reduce {

    my $numTables = 0;
    
    foreach my $table (keys %deaths) {
	my %acDeaths;
	my %acMonths;

	foreach my $age_code (@age_codes) {
	    my $monthCnt = 0;
	    for (my $year = minyear($table); $year <= maxyear($table);$year++) {
		for (my $month = 1;$month <= maxmonth($table,$year);$month++) {
		    my $death = $deaths{$table}{$year}{$month}{$age_code}{$signal_proc_raw};
		    die "DEATH IS UNDEFINED FOR TABLE $table Year $year Month $month Age_Code $age_code  Signal Proc: $signal_proc_raw\n" if(not defined($death));
		    push(@{$acMonths{$age_code}},$monthCnt);
		    push(@{$acDeaths{$age_code}},$death);
		    $monthCnt++;
		}
	    }
	}


	foreach my $age_code (@age_codes) {
	    my $monthVec = $acMonths{$age_code};
	    my $deathVec = $acDeaths{$age_code};
	    my $deathVecCnt = scalar(@{$deathVec});

	    my $newDeathsS;

	    if($signal_proc_cooked eq "ma") {
		print "CALLING MOVING_AVERAGE FROM noise_reduce on Table $table Age Code $age_code\n" if($debug > 1);
		$newDeathsS = moving_average($deathVec,$smooth_window_deaths);
	    } elsif ($signal_proc_cooked eq "loess") {
		print "CALLING LOESS FROM noise_reduce on Table $table Age Code $age_code\n" if($debug > 1);
		
		$newDeathsS = loess($monthVec,$deathVec,$loess_span, $loess_degree);
#	    } elsif ($signal_proc_cooked eq "pdl_loess") {
#		print "CALLING PDL_LOESS FROM noise_reduce on Table $table Age Code $age_code\n" if($debug > 1);
#		$newDeathsS = pdl_loess($monthVec,$deathVec,$loess_pdl_span, $loess_pdl_degree);
	    } else {
		die "Unknown signal processing algorithm $signal_proc_cooked";
	    }
	    
	    for (my $year = minyear($table); $year <= maxyear($table);$year++) {
		for (my $month = 1;$month <= maxmonth($table,$year);$month++) {
		    my $death = shift(@{$newDeathsS});
		    $deaths{$table}{$year}{$month}{$age_code}{$signal_proc_cooked} = $death;
		}
		$deaths{$table}{$year}{13}{$age_code}{$signal_proc_cooked} = $deaths{$table}{$year}{13}{$age_code}{$signal_proc_raw};
	    }
	}
#	print "COOKED Table $table $numYears/$numMonths/$numAc\n";
    }

    return;
}

sub fill_suppressed {

    my ($table) = @_;

    print "fill_suppressed: TABLE $table\n" if($debug > 0);
    for (my $year=minyear($table);$year <= maxyear($table);$year++) {
	foreach my $age_code (@age_codes) {
	    # first see if the year hasn't been suppressed
	    if(not defined($deaths{$table}{$year}{13}{$age_code}{$signal_proc_raw})) {
		print "Table $table Year $year Age code $age_code NO ANNUAL FOR ADJUSTMENT\n" if($debug > 0);;
		$deaths{$table}{$year}{13}{$age_code}{$signal_proc_raw} = int(max(1,rand(9)));
	    }
	
	    for (my $month=1;$month <= maxmonth($table,$year);$month++) {
		if(not defined($deaths{$table}{$year}{$month}{$age_code}{$signal_proc_raw})) {    # Death was "suppressed" (means it was between 1 and 9)
		    #		    print "Table $table deaths suppressed for $year/$month/$age_code\n";
		    my $fillin_deaths = $deaths{$table}{$year}{13}{$age_code}{$signal_proc_raw};   # Annual number of deaths

		    die "NO ANNUAL (???) in fill_suppressed" if(not defined($fillin_deaths));
			
		    if($fillin_deaths > 300) {
			print("WARNING: Table $table Year $year Month $month Age Code $age_code suspcious annual deaths ($fillin_deaths)\n") if(! ($table =~ "deaths_r99_ucd|deaths_999_ucd|deaths_influenza_mcd"));
		    }
		    
		    $fillin_deaths = int($fillin_deaths / 12);  # make it per-month.  This should probably also try and adjust for seasonality (UGH)
		    
		    my $fillin = int(rand($fillin_deaths));

		    $fillin = min($fillin,9);
		    if($fillin == 0) {
			$fillin = 1;
		    }
		    
		    print("Adjusting $table $year/$month/$age_code supressed deaths to $fillin\n") if($debug > 1);
		    $deaths{$table}{$year}{$month}{$age_code}{$signal_proc_raw} = $fillin;
		}
	    }
	}
    }
}

my %tk = (%real_tables,%canttrend_tables);

foreach my $table (tablenames(%tk)) {
    read_data($table,get_shortstop($table));
    fill_suppressed($table);
}


#
# Allocate 999 and R99 codes as necessary
print "ALLOCATE 99s" if($debug > 0);
#
if($allocate_99s) {
    allocate_99s("deaths_disease_ucd","DISEASE");
    allocate_99s("deaths_nondisease_ucd","NONDISEASE");
}


#
# Generate synthetic tables
#

print "GENERATE DIFFERENCE" if($debug > 0);
generate_difference("deaths_nondisease_mdrugalcohol","deaths_nondisease_ucd","deaths_drugalcoholod_ucd");
generate_difference("deaths_all_mdrugalcohol","deaths_all","deaths_drugalcoholod_ucd");
generate_difference("deaths_disease_mcovid","deaths_disease_mcd","deaths_covid_mcd");

#
#
# Basic data sanity checking
#
#

print "CHECK DATA" if($debug > 0);
check_data();

#
# Do noise reduction
#

print "NOISE REDUCE DATA\n" if($debug > 0);
noise_reduce();

my @tables = tablenames(%real_tables,%canttrend_tables,%synthetic_tables);

foreach my $table (@tables) {

    if(!table_is_canttrend_table($table)) {
	print "$table COMPUTE TRENDS\n" if($debug > 0);
	compute_trends($table);
	
	print "$table OUTPUT TRENDS\n" if($debug > 0);
	output_trends($table,"Outputs/CSV/$table/$ofile-trends.csv");
    
    } else {
	print "INFO: Table $table is a cant-trend table.\n" if($debug > 0);
    }


    print "$table OUTPUT DATA\n" if($debug > 0);
    output_data($table,"Outputs/CSV/$table/$ofile-data.csv",0);
    output_data($table,"Outputs/CSV/$table/$ofile-data-raw.csv",1);
#
#
#
    #

    
    foreach my $demo (demoNames()) {
	print "$table DEMO $demo output\n" if($debug > 0);
	output_months1($table,"Outputs/CSV/$table/$ofile-$demo-BASELINE-months1.csv","BASELINE",$demo);
	output_months1($table,"Outputs/CSV/$table/$ofile-$demo-PANDEMIC-months1.csv","PANDEMIC",$demo);
	output_months2($table,"Outputs/CSV/$table/$ofile-$demo-months2.csv",$demo);
    }
}

print_trend_troubles();