Skip to content

Commit 256e474

Browse files
committed
2 parents bfb5a71 + 7c633c7 commit 256e474

12 files changed

+266
-75
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Suggests:
110110
cluster,
111111
cobalt,
112112
coda,
113+
correlation,
113114
coxme,
114115
cplm,
115116
curl,
@@ -228,4 +229,4 @@ Config/testthat/parallel: true
228229
Config/Needs/website: easystats/easystatstemplate
229230
Config/Needs/check: stan-dev/cmdstanr
230231
Config/rcmdcheck/ignore-inconsequential-notes: true
231-
Remotes: easystats/insight, easystats/bayestestR
232+
Remotes: easystats/insight, easystats/bayestestR, easystats/correlation

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ S3method(equivalence_test,rma)
151151
S3method(equivalence_test,wbm)
152152
S3method(equivalence_test,zeroinfl)
153153
S3method(factor_analysis,data.frame)
154+
S3method(factor_analysis,matrix)
154155
S3method(factor_scores,fa)
155156
S3method(factor_scores,omega)
156157
S3method(factor_scores,parameters_efa)

NEWS.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,22 @@
44

55
* The `standardize` argument in `factor_analysis()` now defaults to `FALSE`.
66

7+
* The `rotation` argument in `factor_analysis()` now defaults to `"oblimin"`,
8+
because the former default of `"none"` rarely makes sense in the context of
9+
factor analysis. If you want to use no rotation, please set `rotation =
10+
"none"`.
11+
712
* The `cor` argument in `n_factors()` was renamed into `correlation_matrix`. In
813
`factor_analysis()`, the `cor` argument was completely removed to avoid naming
914
collision with the `cor` argument of `psych::fa()`, which now users can pass
1015
the `cor` argument to `psych::fa()` when using `factor_analysis()`.
1116

1217
## Changes
1318

19+
* `factor_analysis()` gets a `.matrix` method, including arguments `n_obs` and
20+
`n_matrix`, to compute factor analysis for a correlation matrix or covariance
21+
matrix.
22+
1423
* New function `factor_scores()` to extract factor scores from EFA (`psych::fa()`
1524
or `factor_analysis()`).
1625

R/factor_analysis.R

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ factor_analysis <- function(x, ...) {
99
#' @export
1010
factor_analysis.data.frame <- function(x,
1111
n = "auto",
12-
rotation = "none",
12+
rotation = "oblimin",
13+
factor_method = "minres",
1314
sort = FALSE,
1415
threshold = NULL,
1516
standardize = FALSE,
@@ -31,7 +32,7 @@ factor_analysis.data.frame <- function(x,
3132

3233
# FA
3334
out <- model_parameters(
34-
psych::fa(x, nfactors = n, rotate = rotation, ...),
35+
psych::fa(x, nfactors = n, rotate = rotation, fm = factor_method, ...),
3536
threshold = threshold,
3637
sort = sort,
3738
...
@@ -42,6 +43,56 @@ factor_analysis.data.frame <- function(x,
4243
}
4344

4445

46+
#' @rdname principal_components
47+
#' @export
48+
factor_analysis.matrix <- function(x,
49+
n = "auto",
50+
rotation = "oblimin",
51+
factor_method = "minres",
52+
n_obs = NULL,
53+
sort = FALSE,
54+
threshold = NULL,
55+
standardize = FALSE,
56+
...) {
57+
# check if we have a square matrix. in this case, we assume that
58+
# the user wants to do a factor analysis on the correlation matrix
59+
if ((dim(x)[1] == dim(x)[2]) && is.null(n_obs)) {
60+
insight::format_error(
61+
"You provided a square matrix, which is assumed to be a correlation matrix. Please specify the number of observations with `n_obs`. If your matrix is not a correlation matrix, please provide a data frame instead."
62+
)
63+
}
64+
65+
# the default n.obs argument in `psych::fa()` is `NA`, so we change
66+
# our default `NULL` to `NA` to avoid errors
67+
n_matrix <- NULL
68+
if (is.null(n_obs)) {
69+
n_obs <- NA
70+
} else if (is.matrix(n_obs)) {
71+
n_matrix <- n_obs
72+
n_obs <- NA
73+
# check for correct dimensions
74+
if (dim(n_matrix)[1] != dim(x)[1] || dim(n_matrix)[2] != dim(x)[2]) {
75+
insight::format_error(
76+
"The provided `n_obs` matrix must have the same dimensions as the input matrix."
77+
)
78+
}
79+
}
80+
81+
factor_analysis.data.frame(
82+
x,
83+
n = n,
84+
rotation = rotation,
85+
factor_method = factor_method,
86+
sort = sort,
87+
threshold = threshold,
88+
standardize = standardize,
89+
n.obs = n_obs,
90+
np.obs = n_matrix,
91+
...
92+
)
93+
}
94+
95+
4596
.is_oblique_rotation <- function(rotation) {
4697
!is.null(rotation) && tolower(rotation) %in% c("promax", "oblimin", "simplimax", "bentlerQ", "geominQ", "biquartimin", "cluster") # nolint
4798
}

R/principal_components.R

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#' Principal Component Analysis (PCA) and Factor Analysis (FA)
22
#'
3-
#' The functions `principal_components()` and `factor_analysis()` can
4-
#' be used to perform a principal component analysis (PCA) or a factor analysis
5-
#' (FA). They return the loadings as a data frame, and various methods and
6-
#' functions are available to access / display other information (see the
7-
#' Details section).
3+
#' The functions `principal_components()` and `factor_analysis()` can be used to
4+
#' perform a principal component analysis (PCA) or a factor analysis (FA). They
5+
#' return the loadings as a data frame, and various methods and functions are
6+
#' available to access / display other information (see the 'Details' section).
87
#'
9-
#' @param x A data frame or a statistical model.
8+
#' @param x A data frame or a statistical model. For `closest_component()`, the
9+
#' output of the `principal_components()` function.
1010
#' @param n Number of components to extract. If `n="all"`, then `n` is set as
1111
#' the number of variables minus 1 (`ncol(x)-1`). If `n="auto"` (default) or
1212
#' `n=NULL`, the number of components is selected through [`n_factors()`]
@@ -19,12 +19,29 @@
1919
#' @param rotation If not `"none"`, the PCA / FA will be computed using the
2020
#' **psych** package. Possible options include `"varimax"`, `"quartimax"`,
2121
#' `"promax"`, `"oblimin"`, `"simplimax"`, or `"cluster"` (and more). See
22-
#' [`psych::fa()`] for details.
22+
#' [`psych::fa()`] for details. The default is `"none"` for PCA, and
23+
#' `"oblimin"` for FA.
24+
#' @param factor_method The factoring method to be used. Passed to the `fm`
25+
#' argument in `psych::fa()`. Defaults to `"minres"` (minimum residual). Other
26+
#' options include `"uls"`, `"ols"`, `"wls"`, `"gls"`, `"ml"`, `"minchi"`,
27+
#' `"minrank"`, `"old.min"`, and `"alpha"`. See `?psych::fa` for details.
2328
#' @param sparse Whether to compute sparse PCA (SPCA, using [`sparsepca::spca()`]).
2429
#' SPCA attempts to find sparse loadings (with few nonzero values), which improves
2530
#' interpretability and avoids overfitting. Can be `TRUE` or `"robust"` (see
2631
#' [`sparsepca::robspca()`]).
2732
#' @param sort Sort the loadings.
33+
#' @param n_obs An integer or a matrix.
34+
#' - **Integer:** Number of observations in the original data set if `x` is a
35+
#' correlation matrix. Required to compute correct fit indices.
36+
#' - **Matrix:** A matrix where each cell `[i, j]` specifies the number of
37+
#' pairwise complete observations used to compute the correlation between
38+
#' variable `i` and variable `j` in the input `x`. It is crucial when `x` is
39+
#' a correlation matrix (rather than raw data), especially if that matrix
40+
#' was derived from a dataset containing missing values using pairwise
41+
#' deletion. Providing a matrix allows `psych::fa()` to accurately calculate
42+
#' statistical measures, such as chi-square fit statistics, by accounting
43+
#' for the varying sample sizes that contribute to each individual
44+
#' correlation coefficient.
2845
#' @param threshold A value between 0 and 1 indicates which (absolute) values
2946
#' from the loadings should be removed. An integer higher than 1 indicates the
3047
#' n strongest loadings to retain. Can also be `"max"`, in which case it will
@@ -46,7 +63,6 @@
4663
#' with missing values from the original data, hence the number of rows of
4764
#' predicted data and original data is equal.
4865
#' @param ... Arguments passed to or from other methods.
49-
#' @param pca_results The output of the `principal_components()` function.
5066
#' @param digits Argument for `print()`, indicates the number of digits
5167
#' (rounding) to be used.
5268
#' @param labels Argument for `print()`, character vector of same length as
@@ -83,7 +99,7 @@
8399
#' values, so it matches the original data frame.
84100
#'
85101
#' - `performance::item_omega()` is a convenient wrapper around `psych::omega()`,
86-
#' which provides some additioal methods to work seamleassly within the
102+
#' which provides some additional methods to work seamlessly within the
87103
#' *easystats* framework.
88104
#'
89105
#' - [`performance::check_normality()`] checks residuals from objects returned
@@ -134,14 +150,15 @@
134150
#'
135151
#' ## Computing Item Scores
136152
#' Use [`get_scores()`] to compute scores for the "subscales" represented by the
137-
#' extracted principal components. `get_scores()` takes the results from
138-
#' `principal_components()` and extracts the variables for each component found
139-
#' by the PCA. Then, for each of these "subscales", raw means are calculated
140-
#' (which equals adding up the single items and dividing by the number of items).
141-
#' This results in a sum score for each component from the PCA, which is on the
142-
#' same scale as the original, single items that were used to compute the PCA.
143-
#' One can also use `predict()` to back-predict scores for each component,
144-
#' to which one can provide `newdata` or a vector of `names` for the components.
153+
#' extracted principal components or factors. `get_scores()` takes the results
154+
#' from `principal_components()` or `factor_analysis()` and extracts the
155+
#' variables for each component found by the PCA. Then, for each of these
156+
#' "subscales", raw means are calculated (which equals adding up the single
157+
#' items and dividing by the number of items). This results in a sum score for
158+
#' each component from the PCA, which is on the same scale as the original,
159+
#' single items that were used to compute the PCA. One can also use `predict()`
160+
#' to back-predict scores for each component, to which one can provide `newdata`
161+
#' or a vector of `names` for the components.
145162
#'
146163
#' ## Explained Variance and Eingenvalues
147164
#' Use `summary()` to get the Eigenvalues and the explained variance for each
@@ -213,9 +230,9 @@
213230
#'
214231
#' # Factor Analysis (FA) ------------------------
215232
#'
216-
#' factor_analysis(mtcars[, 1:7], n = "all", threshold = 0.2)
217-
#' factor_analysis(mtcars[, 1:7], n = 2, rotation = "oblimin", threshold = "max", sort = TRUE)
218-
#' factor_analysis(mtcars[, 1:7], n = 2, threshold = 2, sort = TRUE)
233+
#' factor_analysis(mtcars[, 1:7], n = "all", threshold = 0.2, rotation = "Promax")
234+
#' factor_analysis(mtcars[, 1:7], n = 2, threshold = "max", sort = TRUE)
235+
#' factor_analysis(mtcars[, 1:7], n = 2, rotation = "none", threshold = 2, sort = TRUE)
219236
#'
220237
#' efa <- factor_analysis(mtcars[, 1:5], n = 2)
221238
#' summary(efa)
@@ -234,9 +251,9 @@ principal_components <- function(x, ...) {
234251

235252
#' @rdname principal_components
236253
#' @export
237-
rotated_data <- function(pca_results, verbose = TRUE) {
238-
original_data <- attributes(pca_results)$dataset
239-
rotated_matrix <- insight::get_predicted(attributes(pca_results)$model)
254+
rotated_data <- function(x, verbose = TRUE) {
255+
original_data <- attributes(x)$dataset
256+
rotated_matrix <- insight::get_predicted(attributes(x)$model)
240257
out <- NULL
241258

242259
if (is.null(original_data) || is.null(rotated_matrix)) {
@@ -246,7 +263,7 @@ rotated_data <- function(pca_results, verbose = TRUE) {
246263
return(NULL)
247264
}
248265

249-
compl_cases <- attributes(pca_results)$complete_cases
266+
compl_cases <- attributes(x)$complete_cases
250267
if (is.null(compl_cases) && nrow(original_data) != nrow(rotated_matrix)) {
251268
if (verbose) {
252269
insight::format_warning("Could not retrieve information about missing data.")

R/utils_pca_efa.R

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
#' Get Scores from Principal Component Analysis (PCA)
1+
#' Get Scores from Principal Component or Factor Analysis (PCA/FA)
22
#'
33
#' `get_scores()` takes `n_items` amount of items that load the most
44
#' (either by loading cutoff or number) on a component, and then computes their
5-
#' average.
5+
#' average. This results in a sum score for each component from the PCA/FA,
6+
#' which is on the same scale as the original, single items that were used to
7+
#' compute the PCA/FA.
68
#'
7-
#' @param x An object returned by [principal_components()].
9+
#' @param x An object returned by [principal_components()] or [factor_analysis()].
810
#' @param n_items Number of required (i.e. non-missing) items to build the sum
911
#' score for an observation. If an observation has more missing values than
1012
#' `n_items` in all items of a (sub) scale, `NA` is returned for that
@@ -19,15 +21,18 @@
1921
#' @details
2022
#' `get_scores()` takes the results from [`principal_components()`] or
2123
#' [`factor_analysis()`] and extracts the variables for each component found by
22-
#' the PCA. Then, for each of these "subscales", row means are calculated (which
23-
#' equals adding up the single items and dividing by the number of items). This
24-
#' results in a sum score for each component from the PCA, which is on the same
25-
#' scale as the original, single items that were used to compute the PCA.
24+
#' the PCA/FA. Then, for each of these "subscales", row means are calculated
25+
#' (which equals adding up the single items and dividing by the number of
26+
#' items). This results in a sum score for each component from the PCA/FA, which
27+
#' is on the same scale as the original, single items that were used to compute
28+
#' the PCA/FA.
2629
#'
2730
#' @return A data frame with subscales, which are average sum scores for all
28-
#' items from each component.
31+
#' items from each component or factor.
2932
#'
30-
#' @seealso [`principal_components()`]
33+
#' @seealso Functions to carry out a PCA ([`principal_components()`]) or
34+
#' a FA ([`factor_analysis()`]). [`factor_scores()`] extracts factor scores
35+
#' from an FA object.
3136
#'
3237
#' @examplesIf insight::check_if_installed("psych", quietly = TRUE)
3338
#' pca <- principal_components(mtcars[, 1:7], n = 2, rotation = "varimax")
@@ -596,11 +601,11 @@ sort.parameters_pca <- sort.parameters_efa
596601

597602
#' @rdname principal_components
598603
#' @export
599-
closest_component <- function(pca_results) {
600-
if ("closest_component" %in% names(attributes(pca_results))) {
601-
attributes(pca_results)$closest_component
604+
closest_component <- function(x) {
605+
if ("closest_component" %in% names(attributes(x))) {
606+
attributes(x)$closest_component
602607
} else {
603-
.closest_component(pca_results)
608+
.closest_component(x)
604609
}
605610
}
606611

man/get_scores.Rd

Lines changed: 14 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)