From 19def1ff013d5ebda03c77b9a571cd5a26654c53 Mon Sep 17 00:00:00 2001
From: matt5mitchell <62366974+matt5mitchell@users.noreply.github.com>
Date: Fri, 3 Sep 2021 12:49:05 -0700
Subject: [PATCH] Added different entropy calculations

Added new "type" argument, allowing users to select either the "classic" poLCA entropy statistic or the statistics calculated by Mplus or LatentGold. LatentGold actually has two entropy statistics: a basic entropy calculation and an entropy-based R-squared statistic
---
 R/poLCA.entropy.R | 55 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/R/poLCA.entropy.R b/R/poLCA.entropy.R
index d3e86a8..6cd9c9f 100644
--- a/R/poLCA.entropy.R
+++ b/R/poLCA.entropy.R
@@ -1,7 +1,48 @@
-poLCA.entropy <-
-function(lc) {
-    K.j <- sapply(lc$probs,ncol)
-    fullcell <- expand.grid(lapply(K.j,seq,from=1))
-    P.c <- poLCA.predcell(lc,fullcell)
-    return(-sum(P.c * log(P.c),na.rm=TRUE))
-}
+# Recommended update to poLCA.entropy function
+
+poLCA.entropy <- 
+  function(lc, type = c("poLCA", "Mplus", "LGold", "LGoldR2")) {
+    
+    type <- match.arg(type)
+    
+    # Base entropy function
+    entropy <- function(x) {-sum(x * log(x), na.rm = TRUE)}
+    
+    ## "Classic" poLCA entropy ##
+    K.j <- sapply(lc$probs,ncol)
+    fullcell <- expand.grid(lapply(K.j,seq,from=1))
+    P.c <- poLCA.predcell(lc,fullcell)
+    
+    # "Classic" poLCA
+    entropy.polca <- entropy(P.c)
+    
+    ## Mplus and LatentGold entropy statistics ##
+    machine.tolerance <- sqrt(.Machine$double.eps)
+    n <- lc$N # number of observations
+    k <- length(lc$P) #number of classes
+    
+    prob.df <- data.frame(lc$posterior) # posterior probabilities of existing combinations of indicators
+    prob.unpivot <- stack(prob.df) # unpivot probabilities
+    prob <- subset(prob.unpivot, subset = values > machine.tolerance, select = values) # filter near-zero values since Limit{p->0} -p*log(p) = 0
+    
+    # Mplus
+    entropy.mplus <- 1 - (entropy(prob) / (n * log(k)))
+    
+    # LatentGold
+    entropy.lgold <- entropy(prob)
+    
+    # LatentGold entropy-based R-squared
+    error <- entropy.lgold / n # Standardized entropy as "error"
+    error.total <- sum(sapply(lc$P, entropy))
+    entropy.R2.lgold <- (error.total - error) / error.total
+    
+    result <- switch(
+      type,
+      poLCA = entropy.polca,
+      Mplus = entropy.mplus,
+      LGold = entropy.lgold,
+      LGoldR2 = entropy.R2.lgold
+    )
+    
+    return(result)
+  }