IRT without the normality assumption

library(IRTest)
#> Thank you for using IRTest!
#> Please cite the package as:
#> Li, S. (2022). IRTest: Parameter estimation of item response theory with estimation of latent distribution (Version 1.7.0). R package.
#> URL: https://CRAN.R-project.org/package=IRTest
library(ggplot2)




1. Dichotomous items

The function DataGeneration can be used for the pre-analysis step. This function returns a set of artificial data and some useful objects for analysis (e.g., theta, data_D, item_D, & initialitem_D).

In the parameter estimation process, the initialitem_D can be used for an input of the function IRTest_Dich (i.e., initialitem = initialitem_D). The data_D is an artificial item response data that could be used for some analyses such as computer simulation techniques, but would be unnecessary if user-imported item response data is used. The theta and item_D are not used for the estimation process, but could play a role as true parameters if the artificial data (data_D) is used for an analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_D = rep(1:2, each=5),
                          N=1000,
                          nitem_D = 10,
                          nitem_P = 0,
                          latent_dist = "2NM",
                          d = 1.664,
                          sd_ratio = 2,
                          prob = 0.3)

data <- Alldata$data_D
item <- Alldata$item_D
theta <- Alldata$theta
data[1:500, 1] <- NA
data[501:1000, 2] <- NA

If the artificial data (data_D) is used, the true latent distribution looks like;




Mod1 <- IRTest_Dich(data = data,
                    model = 2,
                    latent_dist = "LLS",
                    h=4)




### Summary
summary(Mod1)
#> Convergence:  
#> Successfully converged below the threshold of 1e-04 on 73rd iterations. 
#> 
#> Model Fit:  
#>    deviance   9130.173 
#>         AIC   9178.173 
#>         BIC   9295.959 
#> 
#> The Number of Parameters:  
#>        item   20 
#>        dist   4 
#>       total   24 
#> 
#> The Number of Items:  
#> dichotomous   10 
#> polyotomous   0 
#> 
#> The Estimated Latent Distribution:  
#> method - LLS 
#> ----------------------------------------
#>                                           
#>                                           
#>                                           
#>                                           
#>           . . .       . . . @ .           
#>         @ @ @ @ @ @ @ @ @ @ @ @ @         
#>       @ @ @ @ @ @ @ @ @ @ @ @ @ @ @       
#>     . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .     
#>     @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @     
#>   @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @   
#> +---------+---------+---------+---------+
#> -2        -1        0         1         2

### The estimated item parameters
Mod1$par_est
#>               a           b c
#>  [1,] 0.6918262 -0.07928281 0
#>  [2,] 0.9383418  0.37278883 0
#>  [3,] 1.0179276 -1.55407024 0
#>  [4,] 1.1022914 -1.05543143 0
#>  [5,] 1.0774381 -1.73147120 0
#>  [6,] 1.9863217  0.45700688 0
#>  [7,] 2.0410894 -1.63161594 0
#>  [8,] 2.0560120 -0.49101479 0
#>  [9,] 1.9277635 -0.50771321 0
#> [10,] 2.3390683  0.25528127 0

### The asymptotic standard errors of item parameters
Mod1$se
#>                a          b  c
#>  [1,] 0.11643444 0.15353537 NA
#>  [2,] 0.11073882 0.12959651 NA
#>  [3,] 0.09593797 0.12850705 NA
#>  [4,] 0.09010128 0.08861957 NA
#>  [5,] 0.10475036 0.13844368 NA
#>  [6,] 0.12350916 0.04375507 NA
#>  [7,] 0.19009790 0.08032865 NA
#>  [8,] 0.12700645 0.04341767 NA
#>  [9,] 0.12024177 0.04550756 NA
#> [10,] 0.14008630 0.03870131 NA

### The estimated ability parameters
plot(theta, Mod1$theta)
abline(b=1, a=0)

### The estimated latent distribution
plot(Mod1) +
  lims(y = c(0, .5))

item_fit(Mod1)
#> Insufficient data values to produce 10 bins for Item 1. 9 bins will be used.
#> Insufficient data values to produce 9 bins for Item 1. 8 bins will be used.
#> Insufficient data values to produce 8 bins for Item 1. 7 bins will be used.
#> Insufficient data values to produce 7 bins for Item 1. 6 bins will be used.
#>        stat df p.value
#> 1  52.59557  3  0.0000
#> 2  19.33040  7  0.0072
#> 3  33.84115  7  0.0000
#> 4  25.45106  7  0.0006
#> 5  30.08644  7  0.0001
#> 6  76.65091  7  0.0000
#> 7  31.18813  7  0.0001
#> 8  29.19976  7  0.0001
#> 9  42.28145  7  0.0000
#> 10 50.04304  7  0.0000
reliability(Mod1)
#> test reliability 
#>        0.7740163

2. Polytomous items

As in the case of dichotomous items, the function DataGeneration can be used for the pre-analysis step. This function returns a set of artificial data and some useful objects for analysis (e.g., theta, data_P, item_P, & initialitem_P).

In the parameter estimation process, the initialitem_P can be used for an input of the function IRTest_Poly (i.e., initialitem = initialitem_P). The data_P is an artificial item response data that could be used for some analyses such as computer simulation techniques, but would be unnecessary if user-imported item response data is used. The theta and item_P are not used for the estimation process, but could play a role as true parameters if the artificial data (data_P) is used for an analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_P = "GPCM",
                          categ = rep(c(3,7), each = 5),
                          N=1000,
                          nitem_D = 0,
                          nitem_P = 10,
                          latent_dist = "2NM",
                          d = 1.414,
                          sd_ratio = 2,
                          prob = 0.5)

data <- Alldata$data_P
item <- Alldata$item_P
theta <- Alldata$theta
data[1:500, 1:3] <- NA
data[501:1000, 4:6] <- NA

If the artificial data (data_P) is used, the true latent distribution looks like;




Mod1 <- IRTest_Poly(data = data,
                    model = "GPCM",
                    latent_dist = "KDE")




### Summary
summary(Mod1)
#> Convergence:  
#> Successfully converged below the threshold of 1e-04 on 29th iterations. 
#> 
#> Model Fit:  
#>    deviance   17550.31 
#>         AIC   17650.31 
#>         BIC   17895.69 
#> 
#> The Number of Parameters:  
#>        item   49 
#>        dist   1 
#>       total   50 
#> 
#> The Number of Items:  
#> dichotomous   0 
#> polyotomous   10 
#> 
#> The Estimated Latent Distribution:  
#> method - KDE 
#> ----------------------------------------
#>                                           
#>               . .                         
#>             . @ @ .                       
#>             @ @ @ @ .                     
#>           @ @ @ @ @ @ .                   
#>         . @ @ @ @ @ @ @ .                 
#>         @ @ @ @ @ @ @ @ @ @ .             
#>       . @ @ @ @ @ @ @ @ @ @ @ @ @ @ .     
#>     . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .   
#>   . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 
#> +---------+---------+---------+---------+
#> -2        -1        0         1         2

### The estimated item parameters
Mod1$par_est
#>               a          b_1        b_2          b_3         b_4         b_5
#>  [1,] 1.7875216  0.768557011  1.5465558           NA          NA          NA
#>  [2,] 0.7641532 -0.227304122  0.8418435           NA          NA          NA
#>  [3,] 0.6481532 -0.005371191  1.2849516           NA          NA          NA
#>  [4,] 1.6588070 -0.945807752  0.9394254           NA          NA          NA
#>  [5,] 2.2530632 -2.195167615 -2.0151652           NA          NA          NA
#>  [6,] 1.2353060 -0.594188007  0.3089519  0.598282406  2.93402549  0.68958962
#>  [7,] 1.4523293 -3.049821033 -1.9487927 -1.202220957 -0.92560041  0.08132396
#>  [8,] 0.8574999 -1.390253736 -0.8853679 -0.365115011 -0.04109078 -0.13440845
#>  [9,] 1.5337358 -1.016578133 -0.4014641  0.001875338  0.38574134  0.54652582
#> [10,] 1.2721265 -1.026096753  0.3073373  0.388904895  0.70808745  1.43425458
#>             b_6
#>  [1,]        NA
#>  [2,]        NA
#>  [3,]        NA
#>  [4,]        NA
#>  [5,]        NA
#>  [6,]        NA
#>  [7,] 0.5844565
#>  [8,] 0.3924430
#>  [9,] 0.8937754
#> [10,] 1.4746911

### The asymptotic standard errors of item parameters
Mod1$se
#>                a        b_1        b_2        b_3        b_4       b_5
#>  [1,] 0.14797998 0.07150794 0.07980719         NA         NA        NA
#>  [2,] 0.08139314 0.15965897 0.14218502         NA         NA        NA
#>  [3,] 0.07631251 0.17392515 0.17304300         NA         NA        NA
#>  [4,] 0.18097518 0.06426531 0.15622793         NA         NA        NA
#>  [5,] 0.34689261 0.20947806 0.16496940         NA         NA        NA
#>  [6,] 0.12884087 0.08725042 0.13210795 0.18805008 0.86173450 1.1727244
#>  [7,] 0.07665161 0.42420195 0.14798194 0.09322249 0.07477916 0.0695550
#>  [8,] 0.04763529 0.16463904 0.14333983 0.14274591 0.15090036 0.1460057
#>  [9,] 0.08091761 0.07218241 0.07169808 0.08186487 0.09609836 0.1002510
#> [10,] 0.06945961 0.07474044 0.08939833 0.10578974 0.11570251 0.1288563
#>              b_6
#>  [1,]         NA
#>  [2,]         NA
#>  [3,]         NA
#>  [4,]         NA
#>  [5,]         NA
#>  [6,]         NA
#>  [7,] 0.07437509
#>  [8,] 0.12931705
#>  [9,] 0.09122916
#> [10,] 0.13744175

### The estimated ability parameters
plot(theta, Mod1$theta)
abline(b=1, a=0)

### The estimated latent distribution
plot(Mod1) +
  lims(y = c(0, .5))

item_fit(Mod1)
#>        stat df p.value
#> 1  22.49394 15  0.0955
#> 2  14.15362 15  0.5139
#> 3  17.37764 15  0.2968
#> 4  24.17928 15  0.0621
#> 5  23.89154 15  0.0670
#> 6  39.84661 39  0.4323
#> 7  81.99800 47  0.0012
#> 8  59.11891 47  0.1105
#> 9  58.43121 47  0.1225
#> 10 43.20066 47  0.6307
reliability(Mod1)
#> test reliability 
#>        0.9158039

3. Mixed-format test

As in the case of dichotomous and polytomous items, the function DataGeneration can be used for the pre-analysis step. This function returns artificial data and some useful objects for analysis (i.e., theta, data_D, item_D, initialitem_D, data_P, item_P, & initialitem_P).

In the parameter estimation process, the initialitem_D and initialitem_P can be used for an input of the function IRTest_Mix (i.e., initialitem_D = initialitem_D, & initialitem_P = initialitem_P). The data_D and data_P are artificial item response data sets that could be used for some analyses such as computer simulation techniques, but would be unnecessary if user-imported item response data is used. The theta and item_D and item_P are not used for the estimation process, but could play a role as true parameters if the artificial data (data_D & data_P) is used for an analysis.

Alldata <- DataGeneration(seed = 12345678,
                          model_D = rep(2,5),
                          model_P = "GPCM",
                          categ = rep(5,5),
                          N=1000,
                          nitem_D = 5,
                          nitem_P = 5,
                          latent_dist = "2NM",
                          d = 1.664,
                          sd_ratio = 1,
                          prob = 0.5)

DataD <- Alldata$data_D
DataP <- Alldata$data_P
itemD <- Alldata$item_D
itemP <- Alldata$item_P
theta <- Alldata$theta

DataD[1:250, 1] <- NA
DataD[251:500, 2] <- NA
DataP[501:750, 1] <- NA
DataP[751:1000, 2] <- NA

If the artificial data (data) is used, the true latent distribution looks like,

#> Scale for y is already present.
#> Adding another scale for y, which will replace the existing scale.




Mod1 <- IRTest_Mix(data_D = DataD,
                   data_P = DataP,
                   model_D = "2PL",
                   model_P = "GPCM",
                   latent_dist = "KDE")




### Summary
summary(Mod1)
#> Convergence:  
#> Successfully converged below the threshold of 1e-04 on 35th iterations. 
#> 
#> Model Fit:  
#>    deviance   1766382 
#>         AIC   1766454 
#>         BIC   1766631 
#> 
#> The Number of Parameters:  
#>        item   35 
#>        dist   1 
#>       total   36 
#> 
#> The Number of Items:  
#> dichotomous   5 
#> polyotomous   5 
#> 
#> The Estimated Latent Distribution:  
#> method - KDE 
#> ----------------------------------------
#>                                           
#>                                           
#>                                           
#>             @ @             .             
#>           @ @ @ @         @ @ @           
#>         . @ @ @ @ @ . . @ @ @ @ @         
#>         @ @ @ @ @ @ @ @ @ @ @ @ @ .       
#>       @ @ @ @ @ @ @ @ @ @ @ @ @ @ @       
#>     . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @     
#>   . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @   
#> +---------+---------+---------+---------+
#> -2        -1        0         1         2

### The estimated item parameters
Mod1$par_est
#> $Dichotomous
#>              a          b c
#> [1,] 1.5821249 -0.7547435 0
#> [2,] 0.9794597 -1.5943777 0
#> [3,] 0.8501268  0.7680404 0
#> [4,] 0.9688784 -0.9614223 0
#> [5,] 1.4944181 -1.3590145 0
#> 
#> $Polytomous
#>             a        b_1         b_2        b_3       b_4 b_5 b_6
#> [1,] 2.322153 -0.7166374  0.75456780  1.5477052 2.7267412  NA  NA
#> [2,] 1.029950 -0.6745269 -0.75486611  0.6870106 1.1195071  NA  NA
#> [3,] 2.488694 -0.1582097  0.02776561  1.0515812 1.2225348  NA  NA
#> [4,] 1.916094 -1.8030428 -0.34302398 -0.1287233 0.2492093  NA  NA
#> [5,] 1.961237 -0.4699263 -0.15012932  0.8388752 1.1759573  NA  NA

### The asymptotic standard errors of item parameters
Mod1$se
#> $Dichotomous
#>               a          b  c
#> [1,] 0.12876313 0.07623034 NA
#> [2,] 0.11110379 0.18286320 NA
#> [3,] 0.07721960 0.09743837 NA
#> [4,] 0.08380408 0.09511242 NA
#> [5,] 0.12467429 0.08327815 NA
#> 
#> $Polytomous
#>               a        b_1        b_2        b_3        b_4 b_5 b_6
#> [1,] 0.14781076 0.04331630 0.05904431 0.08494950 0.27118601  NA  NA
#> [2,] 0.06825487 0.11285246 0.10874430 0.11949196 0.14430172  NA  NA
#> [3,] 0.13384192 0.05041735 0.05173368 0.05111369 0.05795802  NA  NA
#> [4,] 0.10416674 0.07999619 0.06179259 0.06573655 0.06047707  NA  NA
#> [5,] 0.10426523 0.05592471 0.05795963 0.05861632 0.06500365  NA  NA

### The estimated ability parameters
plot(theta, Mod1$theta)
abline(b=1, a=0)

### The estimated latent distribution
plot(Mod1) +
  lims(y = c(0, .5))

item_fit(Mod1)
#> $Dichotomous
#>        stat df p.value
#> 1 11.045242  7  0.1367
#> 2  6.643194  7  0.4670
#> 3  6.371348  7  0.4971
#> 4 11.931981  7  0.1028
#> 5 13.343816  7  0.0642
#> 
#> $Polytomous
#>       stat df p.value
#> 1 31.47649 31  0.4424
#> 2 38.48105 31  0.1670
#> 3 49.70119 31  0.0179
#> 4 38.54354 31  0.1653
#> 5 41.32231 31  0.1018
reliability(Mod1)
#> test reliability 
#>        0.9156917