Skip to content

v1.3 compat #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
188 changes: 188 additions & 0 deletions Manifest.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# This file is machine-generated - editing it directly is not advised

[[Arpack]]
deps = ["Arpack_jll", "Libdl", "LinearAlgebra"]
git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9"
uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
version = "0.4.0"

[[Arpack_jll]]
deps = ["Libdl", "OpenBLAS_jll", "Pkg"]
git-tree-sha1 = "68a90a692ddc0eb72d69a6993ca26e2a923bf195"
uuid = "68821587-b530-5797-8361-c406ea357684"
version = "3.5.0+2"

[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[BinaryProvider]]
deps = ["Libdl", "SHA"]
git-tree-sha1 = "5b08ed6036d9d3f0ee6369410b830f8873d4024c"
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.8"

[[DataAPI]]
git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.1.0"

[[DataStructures]]
deps = ["InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "5a431d46abf2ef2a4d5d00bd0ae61f651cf854c8"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.10"

[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"

[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[Distributions]]
deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
git-tree-sha1 = "6b19601c0e98de3a8964ed33ad73e130c7165b1d"
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
version = "0.22.4"

[[FillArrays]]
deps = ["LinearAlgebra", "Random", "SparseArrays"]
git-tree-sha1 = "85c6b57e2680fa28d5c8adc798967377646fbf66"
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
version = "0.8.5"

[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[LibGit2]]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[Missings]]
deps = ["DataAPI"]
git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "0.4.3"

[[OpenBLAS_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "e2551d7c25d52f35b76d86a50917a3ba8988f519"
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
version = "0.3.7+5"

[[OpenSpecFun_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "65f672edebf3f4e613ddf37db9dcbd7a407e5e90"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.3+1"

[[OrderedCollections]]
deps = ["Random", "Serialization", "Test"]
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.1.0"

[[PDMats]]
deps = ["Arpack", "LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"]
git-tree-sha1 = "5f303510529486bb02ac4d70da8295da38302194"
uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
version = "0.9.11"

[[Pkg]]
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[[QuadGK]]
deps = ["DataStructures", "LinearAlgebra"]
git-tree-sha1 = "dc84e810393cfc6294248c9032a9cdacc14a3db4"
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
version = "2.3.1"

[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"

[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[Rmath]]
deps = ["BinaryProvider", "Libdl", "Random", "Statistics"]
git-tree-sha1 = "2bbddcb984a1d08612d0c4abb5b4774883f6fa98"
uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
version = "0.6.0"

[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

[[SortingAlgorithms]]
deps = ["DataStructures", "Random", "Test"]
git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd"
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
version = "0.3.1"

[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[[SpecialFunctions]]
deps = ["OpenSpecFun_jll"]
git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "0.10.0"

[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[StatsBase]]
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
git-tree-sha1 = "be5c7d45daa449d12868f4466dbf5882242cf2d9"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
version = "0.32.1"

[[StatsFuns]]
deps = ["Rmath", "SpecialFunctions"]
git-tree-sha1 = "f290ddd5fdedeadd10e961eb3f4d3340f09d030a"
uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
version = "0.9.4"

[[SuiteSparse]]
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"

[[Test]]
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
18 changes: 18 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name = "TopicModels"
uuid = "e9825ca3-3499-4c9b-97dc-a93734876e50"
authors = ["Jonathan Chang <slycoder @gmail.com>"]
version = "0.1.0"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"

[compat]
julia = "1.3"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
Empty file removed REQUIRE
Empty file.
34 changes: 17 additions & 17 deletions src/TopicModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ module TopicModels

import Base.length

typealias RaggedMatrix{T} Array{Array{T,1},1}
RaggedMatrix{T} = Array{Array{T,1},1}

type Corpus
struct Corpus
documents::RaggedMatrix{Int64}
weights::RaggedMatrix{Float64}

Expand All @@ -15,7 +15,7 @@ type Corpus
weights
)
end

Corpus(documents::RaggedMatrix{Int64}) = begin
weights = map(documents) do doc
ones(Float64, length(doc))
Expand All @@ -27,7 +27,7 @@ type Corpus
end
end

type Model
struct Model
alphaPrior::Vector{Float64}
betaPrior::Float64
topics::Array{Float64,2}
Expand All @@ -37,9 +37,9 @@ type Model
frozen::Bool
corpus::Corpus

Model(alphaPrior::Vector{Float64},
betaPrior::Float64,
V::Int64,
Model(alphaPrior::Vector{Float64},
betaPrior::Float64,
V::Int64,
corpus::Corpus) = begin
K = length(alphaPrior)
m = new(
Expand All @@ -48,7 +48,7 @@ type Model
zeros(Float64, K, V), # topics
zeros(Float64, K), # topicSums
zeros(Float64, K, length(corpus.documents)), #documentSums
fill(Array(Int64, 0), length(corpus.documents)), # assignments
Array{Array{Int64,1},1}(undef,length(corpus.documents)), # assignments
false,
corpus
)
Expand Down Expand Up @@ -111,8 +111,8 @@ function wordDistribution(word::Int,
out::Vector{Float64})
V = size(model.topics, 2)
for ii in 1:length(out)
u = (model.documentSums[ii, document] + model.alphaPrior[ii]) *
(model.topics[ii, word] + model.betaPrior) /
u = (model.documentSums[ii, document] + model.alphaPrior[ii]) *
(model.topics[ii, word] + model.betaPrior) /
(model.topicSums[ii] + V * model.betaPrior)
@inbounds out[ii] = u
end
Expand All @@ -128,10 +128,10 @@ function sampleWord(word::Int,
end


function updateSufficientStatistics(word::Int64,
function updateSufficientStatistics(word::Int64,
topic::Int64,
document::Int64,
scale::Float64,
scale::Float64,
model::Model)
fr = Float64(!model.frozen)
@inbounds model.documentSums[topic, document] += scale
Expand All @@ -146,7 +146,7 @@ function sampleDocument(document::Int,
Nw = length(words)
@inbounds weights = model.corpus.weights[document]
K = length(model.alphaPrior)
p = Array(Float64, K)
p = Array{Float64,1}(undef,K)
@inbounds assignments = model.assignments[document]
for ii in 1:Nw
@inbounds word = words[ii]
Expand All @@ -170,10 +170,10 @@ end
function termToWordSequence(term::AbstractString)
parts = split(term, ":")
fill(parse(Int64, parts[1]) + 1, parse(Int64, parts[2]))
end
end

# The functions below are designed for public consumption
function trainModel(model::Model,
function trainModel(model::Model,
numIterations::Int64)
for ii in 1:numIterations
println(string("Iteration ", ii, "..."))
Expand All @@ -183,7 +183,7 @@ function trainModel(model::Model,
end

function topTopicWords(model::Model,
lexicon::Array{ASCIIString,1},
lexicon::Array{String,1},
numWords::Int64)
[lexicon[reverse(sortperm(model.topics'[1:end, row]))[1:numWords]]
for row in 1:size(model.topics,1)]
Expand All @@ -198,7 +198,7 @@ end

function readLexicon(stream)
lines = readlines(stream)
map(chomp, convert(Array{AbstractString,1}, lines))
convert(Array{String,1},map(chomp, convert(Array{AbstractString,1}, lines)))
end

export Corpus,
Expand Down