This notebooks conducts a statistical analysis of hypotheses derived through the VAST’15 publication 3D Regression Heat Map Analysis of Population Study Data.

Sources are:

Setup

Load the data and only keep subjects with extracted centerlines. This is necessary to have a concurrent population with the paper.

library(gplots)
library(jsonlite)
library(dplyr)
library(vcd)
library(gridExtra)
ship <- read.csv('/Users/paul/Tresors/regresson-heatmap/data/breast_fat_imageonly_labels_parenchym_mean.csv')

ANOVA of Parenchyma Tissue and Kidney Disorder

par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))

plotmeans(ship$Segmentation_ParenchymToVolume ~ ship$Disease_Kidney_Disorder, digits=2, ccol="red", mean.labels=T, xlab="Kidney Disorder", ylab="Parenchyma Tissue")
## Warning in qt((1 + p)/2, ns - 1): NaNs produced
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship$Segmentation_ParenchymToVolume ~ ship$Disease_Kidney_Disorder, xlab="Kidney Disorder", ylab="parenchyma", col=rainbow(7))

title(main = "Parenchyma Tissue by Kidney Disorder", outer=TRUE)

Remove Subjects with error or no value for kidney disorder.

# Analyze only for subjects with valid back pain indicators
ship_valid_kidney <- filter(ship, !(as.character(Disease_Kidney_Disorder)%in% c("99998 - Fehler, Daten nicht mehr erhebbar", "")))
ship_valid_kidney <- droplevels(ship_valid_kidney)

par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_kidney$Segmentation_ParenchymToVolume ~ ship_valid_kidney$Disease_Kidney_Disorder, digits=2, ccol="red", mean.labels=T, xlab="kidney disorder", ylab="parenchyma tissue")

boxplot(ship_valid_kidney$Segmentation_ParenchymToVolume ~ ship_valid_kidney$Disease_Kidney_Disorder, xlab="kidney disorder", ylab="parenchyma", col=rainbow(7))
title(main = "Parenchyma Tissue by Kidney Disorder", outer=TRUE)

summary(aov(formula = Segmentation_ParenchymToVolume~Disease_Kidney_Disorder, data = ship_valid_kidney))
##                           Df Sum Sq Mean Sq F value Pr(>F)
## Disease_Kidney_Disorder    1    121  121.02   2.663  0.103
## Residuals               1181  53681   45.45

The ANOVA comprises of a fairly low F value with a p value of 0.103. Therefore, by the standard of a p value of 0.05, the alternative hypothesis has to be rejected.

ANOVA of Parenchyma Tissue and Diabetes

par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship$Segmentation_ParenchymToVolume ~ ship$Diabetes, digits=2, ccol="red", mean.labels=T, xlab="diabetes", ylab="parenchyma tissue")
## Warning in qt((1 + p)/2, ns - 1): NaNs produced
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship$Segmentation_ParenchymToVolume ~ ship$Diabetes, xlab="diabetes", ylab="parenchyma", col=rainbow(7))
title(main = "Parenchyma Tissue by Diabetes", outer=TRUE)

# Filter the Error Values
ship_valid_diabetes <- filter(ship, !(as.character(Diabetes)%in% c("99998 - Fehler, Daten nicht mehr erhebbar", "")))
ship_valid_diabetes <- droplevels(ship_valid_diabetes)

# Create Means und Boxplots
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_diabetes$Segmentation_ParenchymToVolume ~ ship_valid_diabetes$Diabetes, digits=2, ccol="red", mean.labels=T, xlab="diabetes", ylab="parenchyma tissue")
boxplot(ship_valid_diabetes$Segmentation_ParenchymToVolume ~ ship_valid_diabetes$Diabetes, xlab="diabetes", ylab="parenchyma", col=rainbow(7))
title(main = "Parenchyma Tissue by Diabetes", outer=TRUE)

# Calculate the Analysis of Variance
summary(aov(formula = Segmentation_ParenchymToVolume~Diabetes, data = ship_valid_diabetes))
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## Diabetes       1    577   577.3    12.8 0.000361 ***
## Residuals   1181  53262    45.1                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diabetes shows a significant correlation with parenchyma tissue with an F value of 12.8 and a p value of 0.0003.

ANOVA of Parenchyma Tissue and Hormone Replacement Therapy

At first, visualize the means and draw the box plots.

# Create Means and Box Plot
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship$Segmentation_ParenchymToVolume ~ ship$Women_Hormone_Replacement_Therapy, digits=2, ccol="red", mean.labels=T, xlab="hormone replacement therapy", ylab="parenchyma tissue")
## Warning in qt((1 + p)/2, ns - 1): NaNs produced
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship$Segmentation_ParenchymToVolume ~ ship$Women_Hormone_Replacement_Therapy, xlab="hormone replacement therapy", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Hormone Replacement Therapy", outer=TRUE)

As before, remove the error dimensions and conduct the ANOVA.

# Filter the Error Values
ship_valid_hormone <- filter(ship, !(as.character(Women_Hormone_Replacement_Therapy)%in% c("99998 - Fehler, Daten nicht mehr erhebbar", "")))
ship_valid_hormone <- droplevels(ship_valid_hormone)

# Create the Means and the Box Plots
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_hormone$Segmentation_ParenchymToVolume ~ ship_valid_hormone$Women_Hormone_Replacement_Therapy, digits=2, ccol="red", mean.labels=T, xlab="hormone replacement therapy", ylab="parenchyma tissue")
boxplot(ship_valid_hormone$Segmentation_ParenchymToVolume ~ ship_valid_hormone$Women_Hormone_Replacement_Therapy, xlab="hormone replacement therapy", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Hormone Replacement Therapy", outer=TRUE)

# Calculate the Analysis of Variance
summary(aov(formula = Segmentation_ParenchymToVolume~Women_Hormone_Replacement_Therapy, data = ship_valid_hormone))
##                                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Women_Hormone_Replacement_Therapy    1   1915  1915.1   43.58 6.14e-11 ***
## Residuals                         1181  51901    43.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOVA for Parenchyma Tissue and Mammography Finding Classification

The mammography finding classification feature is categorical and defined individual for both breasts. Therfore, the ANOVA is conducted for both the left and the right feature. At first, plot both features.

par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship$Segmentation_ParenchymToVolume ~ ship$Mammography_Left_BI_RADS, digits=2, ccol="red", mean.labels=T, xlab="Mammography Finding Classification (Left)", ylab="parenchyma tissue")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship$Segmentation_ParenchymToVolume ~ ship$Mammography_Left_BI_RADS, xlab="Mammography Finding Classification (Left)", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Mammography Finding Classification (Left)", outer=TRUE)

par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship$Segmentation_ParenchymToVolume ~ ship$Mammography_Right_BI_RADS, digits=2, ccol="red", mean.labels=T, xlab="Mammography Finding Classification (Right)", ylab="parenchyma tissue")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship$Segmentation_ParenchymToVolume ~ ship$Mammography_Right_BI_RADS, xlab="Mammography Finding Classification (Right)", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Mammography Finding Classification (Right)", outer=TRUE)

Now remove the invalid dimensions from both features and conduct the analysis.

# Filter Error Values
ship_valid_mammography <- filter(ship, !(as.character(Mammography_Right_BI_RADS)%in% c("99998 - Fehler, Daten nicht mehr erhebbar", "")))
ship_valid_mammography <- droplevels(ship_valid_mammography)

# Create Box and Means Plot (Left)
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Left_BI_RADS, digits=2, ccol="red", mean.labels=T, xlab="mammography finding classification (Left)", ylab="parenchyma tissue")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Left_BI_RADS, xlab="mammography finding classification (Left)", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Mammography Finding Classification (Left)", outer=TRUE)

# Create Box and Means Plot (Right)
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Right_BI_RADS, digits=2, ccol="red", mean.labels=T, xlab="mammography finding classification (Right)", ylab="parenchyma tissue")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Right_BI_RADS, xlab="mammography finding classification (Right)", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Mammography Finding Classification (Right)", outer=TRUE)

# Calculate the Analysis of Variance
summary(aov(formula = Segmentation_ParenchymToVolume~Mammography_Left_BI_RADS, data = ship_valid_mammography))
##                           Df Sum Sq Mean Sq F value Pr(>F)  
## Mammography_Left_BI_RADS   4    459  114.63   2.549 0.0383 *
## Residuals                640  28780   44.97                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(formula = Segmentation_ParenchymToVolume~Mammography_Right_BI_RADS, data = ship_valid_mammography))
##                            Df Sum Sq Mean Sq F value   Pr(>F)    
## Mammography_Right_BI_RADS   4    901  225.33   5.089 0.000484 ***
## Residuals                 640  28338   44.28                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOVA for Parenchyma Tissue and Breast Lesions

This is once again a categorical target feature which is derived for the left and right breast separately.

# Create Means and Box Plots (Left)
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Left_Lesion, digits=2, ccol="red", mean.labels=T, xlab="mammography finding classification (Left)", ylab="parenchyma tissue")
boxplot(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Left_Lesion, xlab="mammography finding classification (Left)", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Mammography Finding Classification (Left)", outer=TRUE)

# Create Means and Box Plots (Right)
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Right_Lesion, digits=2, ccol="red", mean.labels=T, xlab="mammography finding classification (Right)", ylab="parenchyma tissue")
boxplot(ship_valid_mammography$Segmentation_ParenchymToVolume ~ ship_valid_mammography$Mammography_Right_Lesion, xlab="mammography finding classification (Right)", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Mammography Finding Classification (Right)", outer=TRUE)

# Calculate Analysis of Variance
summary(aov(formula = Segmentation_ParenchymToVolume~Mammography_Left_Lesion, data = ship_valid_mammography))
##                          Df Sum Sq Mean Sq F value Pr(>F)
## Mammography_Left_Lesion   2    137   68.36   1.508  0.222
## Residuals               642  29102   45.33
summary(aov(formula = Segmentation_ParenchymToVolume~Mammography_Right_Lesion, data = ship_valid_mammography))
##                           Df Sum Sq Mean Sq F value Pr(>F)
## Mammography_Right_Lesion   2    126   63.18   1.393  0.249
## Residuals                642  29112   45.35

The ANOVA rejects the alternative hypothesis of a correlation between parenchyma tissue and breast lesion.

ANOVA between Parenchyma Tissue and Spiral Contraception

# Create Means and Box Plots
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship$Segmentation_ParenchymToVolume ~ ship$MAMMA_Anamnesis_Contraception_Spiral, digits=2, ccol="red", mean.labels=T, xlab="spiral contraception", ylab="parenchyma tissue")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
boxplot(ship$Segmentation_ParenchymToVolume ~ ship$MAMMA_Anamnesis_Contraception_Spiral, xlab="spiral contraception", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Spiral Contraception", outer=TRUE)

Now remove the subjects without a valid contraception entry.

# Drop the Error Values
ship_valid_contraception <- filter(ship, !(as.character(MAMMA_Anamnesis_Contraception_Spiral)%in% c("99998 - Fehler, Daten nicht mehr erhebbar", "")))
ship_valid_contraception <- droplevels(ship_valid_contraception)

# Calculate Means and Box Plot
par(mfrow=c(1,2), oma=c(1,1,2,1), mar=c(2,2,2,2))
plotmeans(ship_valid_contraception$Segmentation_ParenchymToVolume ~ ship_valid_contraception$MAMMA_Anamnesis_Contraception_Spiral, digits=2, ccol="red", mean.labels=T, xlab="spiral contraception", ylab="parenchyma tissue")
boxplot(ship_valid_contraception$Segmentation_ParenchymToVolume ~ ship_valid_contraception$MAMMA_Anamnesis_Contraception_Spiral, xlab="spiral contraception", ylab="parenchyma tissue", col=rainbow(7))
title(main = "Parenchyma Tissue by Spiral Contraception", outer=TRUE)

# Calculate Analysis of Variance
summary(aov(formula = Segmentation_ParenchymToVolume~MAMMA_Anamnesis_Contraception_Spiral, data = ship_valid_contraception))
##                                       Df Sum Sq Mean Sq F value Pr(>F)
## MAMMA_Anamnesis_Contraception_Spiral   1     13   12.70   0.229  0.632
## Residuals                            483  26772   55.43