Skip to content

Commit 316a1c6

Browse files
montanalowMontana Low
andauthored
freeze all requirements and document the project requirements (#1236)
Co-authored-by: Montana Low <montanalow@gmail.com>
1 parent 3a94291 commit 316a1c6

File tree

9 files changed

+158
-56
lines changed

9 files changed

+158
-56
lines changed

packages/postgresml-python/build.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ rm "$deb_dir/release.sh"
2929
(cat ${SCRIPT_DIR}/DEBIAN/postrm | envsubst '${PGVERSION}') > "$deb_dir/DEBIAN/postrm"
3030

3131
cp ${SCRIPT_DIR}/../../pgml-extension/requirements.txt "$deb_dir/etc/postgresml-python/requirements.txt"
32-
cp ${SCRIPT_DIR}/../../pgml-extension/requirements-autogptq.txt "$deb_dir/etc/postgresml-python/requirements-autogptq.txt"
33-
cp ${SCRIPT_DIR}/../../pgml-extension/requirements-xformers.txt "$deb_dir/etc/postgresml-python/requirements-xformers.txt"
3432

3533
virtualenv --python="python$PYTHON_VERSION" "$deb_dir/var/lib/postgresml-python/pgml-venv"
3634
source "$deb_dir/var/lib/postgresml-python/pgml-venv/bin/activate"

pgml-cms/docs/resources/developer-docs/installation.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ To install the necessary Python packages into a virtual environment, use the `vi
6363
```bash
6464
virtualenv pgml-venv && \
6565
source pgml-venv/bin/activate && \
66-
pip install -r requirements.txt && \
67-
pip install -r requirements-xformers.txt --no-dependencies
66+
pip install -r requirements.txt
6867
```
6968
{% endtab %}
7069

pgml-extension/examples/multi_classification.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ LIMIT 10;
3131

3232
-- linear models
3333
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'ridge');
34-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'stochastic_gradient_descent');
35-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'perceptron');
36-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'passive_aggressive');
34+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'stochastic_gradient_descent');
35+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'perceptron');
36+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'passive_aggressive');
3737

3838
-- support vector machines
3939
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'svm');

pgml-extension/examples/transformers.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@ SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cpu"}');
88

99
SELECT pgml.embed('hkunlp/instructor-xl', 'hi mom', '{"instruction": "Encode it with love"}');
1010

11+
SELECT pgml.transform_stream(
12+
task => '{
13+
"task": "text-generation",
14+
"model": "TheBloke/zephyr-7B-beta-GPTQ",
15+
"model_type": "mistral",
16+
"revision": "main",
17+
"device_map": "auto"
18+
}'::JSONB,
19+
input => 'AI is going to',
20+
args => '{
21+
"max_new_tokens": 100
22+
}'::JSONB
23+
);
1124
-- BitsAndBytes support
1225
SELECT pgml.transform(
1326
task => '{

pgml-extension/requirements-autogptq.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

pgml-extension/requirements-xformers.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

pgml-extension/requirements.base.txt

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# The immediate dependencies of PostgresML are maintained here.
2+
3+
# Locked because newer versions have bugs
4+
transformers-stream-generator==0.0.4
5+
optimum==1.13.2
6+
peft==0.6.2
7+
pyarrow==11.0.0
8+
torch==2.0.1 # 2.1.1 breaks sentence-transformers==2.2.2
9+
10+
# ML
11+
catboost
12+
lightgbm
13+
torchaudio
14+
torchvision
15+
xgboost
16+
17+
# Transformers
18+
accelerate
19+
auto-gptq; sys_platform == 'linux'
20+
bitsandbytes
21+
ctransformers
22+
huggingface-hub
23+
deepspeed
24+
einops
25+
tokenizers
26+
transformers
27+
xformers; sys_platform == 'linux'
28+
29+
# Embeddings
30+
InstructorEmbedding
31+
sentence-transformers
32+
33+
# Ratings
34+
rouge
35+
sacrebleu
36+
sacremoses
37+
38+
# Utils
39+
datasets
40+
orjson
41+
langchain

pgml-extension/requirements.txt

Lines changed: 91 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,101 @@
1-
accelerate==0.22.0
2-
bitsandbytes==0.41.1
3-
catboost==1.2
1+
accelerate==0.25.0
2+
aiohttp==3.9.1
3+
aiosignal==1.3.1
4+
annotated-types==0.6.0
5+
anyio==4.1.0
6+
attrs==23.1.0
7+
bitsandbytes==0.41.3.post2
8+
catboost==1.2.2
9+
certifi==2023.11.17
10+
charset-normalizer==3.3.2
11+
click==8.1.7
12+
colorama==0.4.6
13+
contourpy==1.2.0
414
ctransformers==0.2.27
5-
datasets==2.14.5
6-
deepspeed==0.10.3
7-
huggingface-hub==0.17.1
15+
cycler==0.12.1
16+
dataclasses-json==0.6.3
17+
datasets==2.15.0
18+
deepspeed==0.12.4
19+
dill==0.3.7
20+
einops==0.7.0
21+
filelock==3.13.1
22+
fonttools==4.46.0
23+
frozenlist==1.4.0
24+
fsspec==2023.10.0
25+
graphviz==0.20.1
26+
hjson==3.1.0
27+
huggingface-hub==0.19.4
28+
idna==3.6
829
InstructorEmbedding==1.0.1
30+
Jinja2==3.1.2
31+
joblib==1.3.2
32+
jsonpatch==1.33
33+
jsonpointer==2.4
34+
kiwisolver==1.4.5
35+
langchain==0.0.349
36+
langchain-community==0.0.1
37+
langchain-core==0.0.13
38+
langsmith==0.0.69
939
lightgbm==4.1.0
10-
orjson==3.9.7
11-
pandas==2.1.0
12-
rich==13.5.2
40+
lxml==4.9.3
41+
MarkupSafe==2.1.3
42+
marshmallow==3.20.1
43+
matplotlib==3.8.2
44+
mpmath==1.3.0
45+
multidict==6.0.4
46+
multiprocess==0.70.15
47+
mypy-extensions==1.0.0
48+
networkx==3.2.1
49+
ninja==1.11.1.1
50+
nltk==3.8.1
51+
numpy==1.26.2
52+
optimum==1.13.2
53+
orjson==3.9.10
54+
packaging==23.2
55+
pandas==2.1.4
56+
peft==0.6.2
57+
Pillow==10.1.0
58+
plotly==5.18.0
59+
portalocker==2.8.2
60+
psutil==5.9.6
61+
py-cpuinfo==9.0.0
62+
pyarrow==14.0.1
63+
pyarrow-hotfix==0.6
64+
pydantic==2.5.2
65+
pydantic_core==2.14.5
66+
pynvml==11.5.0
67+
pyparsing==3.1.1
68+
python-dateutil==2.8.2
69+
pytz==2023.3.post1
70+
PyYAML==6.0.1
71+
regex==2023.10.3
72+
requests==2.31.0
1373
rouge==1.0.1
14-
sacrebleu==2.3.1
15-
sacremoses==0.0.53
16-
scikit-learn==1.3.0
17-
sentencepiece==0.1.99
74+
sacrebleu==2.3.3
75+
sacremoses==0.1.1
76+
safetensors==0.4.1
77+
scikit-learn==1.3.2
78+
scipy==1.11.4
1879
sentence-transformers==2.2.2
19-
tokenizers==0.14.1
80+
sentencepiece==0.1.99
81+
six==1.16.0
82+
sniffio==1.3.0
83+
SQLAlchemy==2.0.23
84+
sympy==1.12
85+
tabulate==0.9.0
86+
tenacity==8.2.3
87+
threadpoolctl==3.2.0
88+
tokenizers==0.15.0
2089
torch==2.0.1
2190
torchaudio==2.0.2
2291
torchvision==0.15.2
2392
tqdm==4.66.1
24-
transformers==4.34.1
25-
xgboost==2.0.0
26-
langchain==0.0.287
27-
einops==0.6.1
28-
pynvml==11.5.0
93+
transformers==4.36.0
2994
transformers-stream-generator==0.0.4
30-
optimum==1.13.2
31-
peft==0.6.2
32-
pyarrow==11.0.0
95+
typing-inspect==0.9.0
96+
typing_extensions==4.9.0
97+
tzdata==2023.3
98+
urllib3==2.1.0
99+
xgboost==2.0.2
100+
xxhash==3.4.1
101+
yarl==1.9.4

pgml-extension/src/bindings/transformers/transformers.py

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import shutil
44
import time
55
import queue
6-
import sys
76

87
import datasets
98
from InstructorEmbedding import INSTRUCTOR
@@ -42,7 +41,6 @@
4241
Trainer,
4342
)
4443
from threading import Thread
45-
from typing import Optional
4644

4745
__cache_transformer_by_model_id = {}
4846
__cache_sentence_transformer_by_name = {}
@@ -393,42 +391,28 @@ def transform(task, args, inputs, stream=False):
393391
return orjson.dumps(pipe(inputs, **args), default=orjson_default).decode()
394392

395393

396-
def create_embedding(transformer):
394+
def embed(transformer, inputs, kwargs):
395+
kwargs = orjson.loads(kwargs)
396+
ensure_device(kwargs)
397397
instructor = transformer.startswith("hkunlp/instructor")
398-
klass = INSTRUCTOR if instructor else SentenceTransformer
399-
return klass(transformer)
400398

399+
# Cache the model
400+
if transformer not in __cache_sentence_transformer_by_name:
401+
klass = INSTRUCTOR if instructor else SentenceTransformer
402+
__cache_sentence_transformer_by_name[transformer] = klass(transformer)
403+
model = __cache_sentence_transformer_by_name[transformer]
401404

402-
def embed_using(model, transformer, inputs, kwargs):
403-
if isinstance(kwargs, str):
404-
kwargs = orjson.loads(kwargs)
405-
406-
instructor = transformer.startswith("hkunlp/instructor")
405+
# Handle instruction encoding
407406
if instructor:
408407
texts_with_instructions = []
409408
instruction = kwargs.pop("instruction")
410409
for text in inputs:
411410
texts_with_instructions.append([instruction, text])
412-
413411
inputs = texts_with_instructions
414412

415413
return model.encode(inputs, **kwargs)
416414

417415

418-
def embed(transformer, inputs, kwargs):
419-
kwargs = orjson.loads(kwargs)
420-
421-
ensure_device(kwargs)
422-
423-
if transformer not in __cache_sentence_transformer_by_name:
424-
__cache_sentence_transformer_by_name[transformer] = create_embedding(
425-
transformer
426-
)
427-
model = __cache_sentence_transformer_by_name[transformer]
428-
429-
return embed_using(model, transformer, inputs, kwargs)
430-
431-
432416
def clear_gpu_cache(memory_usage: None):
433417
if not torch.cuda.is_available():
434418
raise PgMLException(f"No GPU available")

0 commit comments

Comments
 (0)