% https://www.irs.gov/uac/SOI-Tax-Stats---Individual-Statistical-Tables-by-Size-of-Adjusted-Gross-Income

%close all
clear all

rng('default');
rng(1);

x = [        random('Discrete Uniform',    5000, 10608,1); ...
        5000+random('Discrete Uniform',    5000, 12030,1); ...
       10000+random('Discrete Uniform',    5000, 12503,1); ...
       15000+random('Discrete Uniform',    5000, 11621,1); ...
       20000+random('Discrete Uniform',    5000, 10125,1); ...
       25000+random('Discrete Uniform',    5000,  8809,1); ...
       30000+random('Discrete Uniform',   10000, 14473,1); ...
       40000+random('Discrete Uniform',   10000, 11279,1); ...
       50000+random('Discrete Uniform',   25000, 19229,1); ...
       75000+random('Discrete Uniform',   25000, 12574,1); ...
      100000+random('Discrete Uniform',  100000, 16425,1); ...
      200000+random('Discrete Uniform',  300000, 4488,1); ...
      500000+random('Discrete Uniform',  500000,  724,1); ...
     1000000+random('Discrete Uniform',  500000,  156,1); ...
     1500000+random('Discrete Uniform',  500000,  64,1); ...
     2000000+random('Discrete Uniform', 3000000,  91,1); ...
     5000000+random('Discrete Uniform',10000000,  21,1); ...
    10000000+random('Discrete Uniform',20000000,  12,1) ...
     ];

x = x(1:10:end);

n = length(x);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

figure(1);
histogram(x,'BinWidth',5000,'BinLimits',[0 200000],'EdgeColor','auto');
%text(4000,400, sprintf('mean   = %8.2f \n', mean(x100mean)),'FontSize',18);
%text(4000,350, sprintf('std     = %8.2f \n', std(x100mean)),'FontSize',18);
title('Adjusted Gross Income (2013)');
xlabel('AGI (dollars)');
set(gca,'FontSize',18);
drawnow;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

figure(2);
histogram(x,'BinWidth',10000,'EdgeColor','auto');
%text(4000,400, sprintf('mean   = %8.2f \n', mean(x100mean)),'FontSize',18);
%text(4000,350, sprintf('std     = %8.2f \n', std(x100mean)),'FontSize',18);
title('Adjusted Gross Income (2013)');
xlabel('AGI (dollars)');
set(gca,'FontSize',18);
drawnow;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

figure(3);
histogram(log10(x),'EdgeColor','auto');
%text(4000,400, sprintf('mean   = %8.2f \n', mean(x100mean)),'FontSize',18);
%text(4000,350, sprintf('std     = %8.2f \n', std(x100mean)),'FontSize',18);
title('Log of Adjusted Gross Income (2013)');
xlabel('log of AGI (dollars)');
set(gca,'FontSize',18);
drawnow;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The is bootstrap

for j=1:10000
    indices = random('Discrete Uniform',n,n,1);
    xresampled = x(indices);
    xmedian(j) = median(xresampled);
    xmean(j) = mean(xresampled);
end

figure(4);
histogram(xmedian);
text(35000, 700, sprintf('mean   = %6.0f \n', mean(xmedian)),'FontSize',18);
text(35000, 600, sprintf('std     = %6.0f \n', std(xmedian)),'FontSize',18);
text(35000, 500, sprintf(' 2.5 pct = %0.0f \n', prctile(xmedian,2.5)),'FontSize',18);
text(35000, 400, sprintf('97.5 pct = %0.0f \n', prctile(xmedian,97.5)),'FontSize',18);
title('Medians of Bootstrapped Samples');
xlabel('AGI (dollars)');
set(gca,'FontSize',18);
drawnow;

figure(5);
histogram(xmean);
text(65000, 550, sprintf('mean   = %6.0f \n', mean(xmean)),'FontSize',18);
text(65000, 500, sprintf('std     = %6.0f \n', std(xmean)),'FontSize',18);
text(65000, 450, sprintf(' 2.5 pct = %0.0f \n', prctile(xmean,2.5)),'FontSize',18);
text(65000, 400, sprintf('97.5 pct = %0.0f \n', prctile(xmean,97.5)),'FontSize',18);
title('Means of Bootstrapped Samples');
xlabel('AGI (dollars)');
set(gca,'FontSize',18);
drawnow;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% TAKE LOGS AND DO CONFIDENCE INTERVAL FOR MEAN OF LOGS

logx = log(x);
avg = mean(logx) 
dev = std(logx)
[exp(avg - 2*dev/sqrt(n)), exp(avg), exp(avg + 2*dev/sqrt(n))]

xsort = sort(x);
k = round(n/2 - sqrt(n))
[xsort(k), median(x), xsort(n-k)]
