From b26ea9aa4ddabb6020f1c04be377fe351f8524dc Mon Sep 17 00:00:00 2001 From: Matt Karikomi Date: Sat, 30 May 2020 16:23:10 -0700 Subject: [PATCH] removed Plots and UMAP, removed Manifest --- .gitignore | 2 ++ Manifest.toml | 87 ---------------------------------------------- Project.toml | 2 -- examples/LDA.jl | 16 ++++----- src/TopicModels.jl | 4 +-- test/runtests.jl | 5 +++ 6 files changed, 15 insertions(+), 101 deletions(-) delete mode 100644 Manifest.toml create mode 100644 test/runtests.jl diff --git a/.gitignore b/.gitignore index e43b0f9..666e942 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .DS_Store +*.jld +Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index cf7317f..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,87 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -[[SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" - -[[Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[SortingAlgorithms]] -deps = ["DataStructures", "Random", "Test"] -git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "0.3.1" - -[[SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[SpecialFunctions]] -deps = ["OpenSpecFun_jll"] -git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "0.10.0" - -[[Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[StatsFuns]] -deps = ["Rmath", "SpecialFunctions"] -git-tree-sha1 = "f290ddd5fdedeadd10e961eb3f4d3340f09d030a" -uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.9.4" - -[[SuiteSparse]] -deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] -uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" - -[[Test]] -deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[Zlib_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "fd36a6739e256527287c5444960d0266712cd49e" -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.11+8" - -[[libass_jll]] -deps = ["Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "Libdl", "Pkg", "Zlib_jll"] -git-tree-sha1 = "3fd3ea3525f2e3d337c54a52b2ca78a5a272bbf5" -uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" -version = "0.14.0+0" - -[[libfdk_aac_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "0e4ace600c20714a8dd67700c4502714d8473e8e" -uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" -version = "0.1.6+1" - -[[libvorbis_jll]] -deps = ["Libdl", "Ogg_jll", "Pkg"] -git-tree-sha1 = "71e54fb89ac3e0344c7185d1876fd96b0f246952" -uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" -version = "1.3.6+2" - -[[x264_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "23664c0757c3740050ca0e22944c786c165ca25a" -uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" -version = "2019.5.25+1" - -[[x265_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "9345e417084421a8e91373d6196bc58e660eed2a" -uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" -version = "3.0.0+0" diff --git a/Project.toml b/Project.toml index a3f7a69..9a70294 100644 --- a/Project.toml +++ b/Project.toml @@ -6,10 +6,8 @@ version = "0.1.0" [deps] Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" -UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e" [compat] julia = "1.3" diff --git a/examples/LDA.jl b/examples/LDA.jl index 32027fc..8d1f1a1 100644 --- a/examples/LDA.jl +++ b/examples/LDA.jl @@ -1,4 +1,4 @@ -using TopicModels, Plots, UMAP +using TopicModels ################################################################################################################################## # Fit and Visualize Real-World Text Data @@ -17,9 +17,10 @@ state = State(model,corpus) topWords = topTopicWords(model, state, 10) # visualize the fit -@time embedding = umap(state.topics, 2, n_neighbors=10) -maxlabels = vec(map(i->i[1], findmax(state.topics,dims=1)[2])) -scatter(embedding[1,:], embedding[2,:], zcolor=maxlabels, title="UMAP: Max-Likelihood Doc Topics on Learned", marker=(2, 2, :auto, stroke(0))) +# using Plots, UMAP +# @time embedding = umap(state.topics, 2, n_neighbors=10) +# maxlabels = vec(map(i->i[1], findmax(state.topics,dims=1)[2])) +# scatter(embedding[1,:], embedding[2,:], zcolor=maxlabels, title="UMAP: Max-Likelihood Doc Topics on Learned", marker=(2, 2, :auto, stroke(0))) ################################################################################################################################## # Fit, Validate, and Visualize Synthetic Data Derived from a Fully-Generative Simulation (Poisson-distributed document-length) @@ -34,13 +35,8 @@ testCorpus = LdaCorpus(k, lexLength, corpLambda, corpLength, scaleK, scaleL) testModel = Model(testCorpus.alpha, testCorpus.beta, testCorpus) testState = State(testModel, testCorpus) -@time trainModel(testModel, testState, 100) + @time trainModel(testModel, testState, 100) # compute validation metrics on a single fit CorpusARI(testState,testModel,testCorpus) # ARI for max. likelihood. document topics DocsARI(testState,testCorpus) # ARI for actual word topics - -# visualize the fit -@time embedding = umap(testState.topics, 2;n_neighbors=10) -maxlabels = vec(map(i->i[1], findmax(CorpusTopics(testCorpus),dims=1)[2])) -scatter(embedding[1,:], embedding[2,:], zcolor=maxlabels, title="UMAP: True on Learned", marker=(2, 2, :auto, stroke(0))) diff --git a/src/TopicModels.jl b/src/TopicModels.jl index 3c6c9e9..b26d005 100644 --- a/src/TopicModels.jl +++ b/src/TopicModels.jl @@ -3,7 +3,7 @@ module TopicModels #Imports import Base.length -using Random, Distributions, Plots, UMAP +using Random, Distributions using SpecialFunctions: loggamma using Clustering: randindex @@ -29,4 +29,4 @@ include("Computation.jl") #Stuff like perplexity and ARI: include("Validation.jl") -end #module \ No newline at end of file +end #module diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..3ccdc76 --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,5 @@ +using Test + +@testset "Inference" begin + include("Gibbs_unit_tests.jl") +end