Skip to content

Commit

Permalink
Add missing files from earlier commit: mostly datasets, and updated t…
Browse files Browse the repository at this point in the history
…ypos/cleaned up some comments
  • Loading branch information
artemisp committed Dec 6, 2023
1 parent 1f3253b commit 71a2da9
Show file tree
Hide file tree
Showing 348 changed files with 2,014 additions and 567 deletions.
15 changes: 14 additions & 1 deletion lavis/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Copyright (c) 2022, salesforce.com, inc.
Copyright (c) 2023, salesforce.com, inc.
All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
Expand Down Expand Up @@ -440,3 +440,16 @@ def get_file_size(filename):
"""
size_in_mb = os.path.getsize(filename) / float(1024**2)
return size_in_mb

def is_serializable(value):
"""
This function checks if the provided value can be serialized into a JSON string.
"""
try:
json.dumps(value)
return True
except (TypeError, OverflowError):
return False

def is_convertible_to_int(value):
return bool(re.match(r'^-?\d+$', str(value)))
4 changes: 2 additions & 2 deletions lavis/configs/datasets/audioset/defaults_mm_cap_instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ datasets:
annotations:
train:
url:
- https://storage.googleapis.com/sfr-xinstructblip-data-research/data//audioset/balanced_train_clean.csv
# - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/audioset/balanced_train_clean.csv
# - https://storage.googleapis.com/sfr-xinstructblip-data-research/data//audioset/balanced_train_clean.csv
- /export/home/LAVIS-xgen_mm/lavis/configs/datasets/audioset/balanced_train_clean.csv
- http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv
storage:
- audioset/annotations/balanced_train_clean.csv
Expand Down
28 changes: 14 additions & 14 deletions lavis/configs/datasets/iconqa/defaults_instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ datasets:
storage:
- iconqa/annotations/train.json
# - /export/share/datasets/vision_language/iconqa/annotations_train.json
val:
url:
- https://storage.googleapis.com/sfr-xinstructblip-data-research/data/iconqa/annotations_val.json
# - /export/share/datasets/vision_language/iconqa/annotations_val.json
storage:
- iconqa/annotations/val.json
# - /export/share/datasets/vision_language/iconqa/annotations_val.json
test:
url:
- https://storage.googleapis.com/sfr-xinstructblip-data-research/data/iconqa/annotations_test.json
# - /export/share/datasets/vision_language/iconqa/annotations_test.json
storage:
- iconqa/annotations/test.json
# - /export/share/datasets/vision_language/iconqa/annotations_test.json
# val:
# url:
# - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/iconqa/annotations_val.json
# # - /export/share/datasets/vision_language/iconqa/annotations_val.json
# storage:
# - iconqa/annotations/val.json
# # - /export/share/datasets/vision_language/iconqa/annotations_val.json
# test:
# url:
# - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/iconqa/annotations_test.json
# # - /export/share/datasets/vision_language/iconqa/annotations_test.json
# storage:
# - iconqa/annotations/test.json
# # - /export/share/datasets/vision_language/iconqa/annotations_test.json

images:
storage: /export/share/datasets/vision_language/iconqa/all_images/
Expand Down
6 changes: 3 additions & 3 deletions lavis/configs/models/blip2/blip2_xinstruct_vicuna7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ model:
pc_model: "ulip2_pointbert"
video_model: "eva_clip_g"
audio_model: "beats"
pretrained_image_qformer: https://storage.googleapis.com/sfr-xinstructblip-data-research/model/xinstructblip_checkpoints/vicuna7b/image_qformer_improved.pth
pretrained_pc_qformer: https://storage.googleapis.com/sfr-xinstructblip-data-research/model/xinstructblip_checkpoints/vicuna7b/pc_qformer_improved.pth
pretrained_video_qformer: https://storage.googleapis.com/sfr-xinstructblip-data-research/model/xinstructblip_checkpoints/vicuna7b/video_qformer_improved.pth
pretrained_image_qformer: https://storage.googleapis.com/sfr-xinstructblip-data-research/model/xinstructblip_checkpoints/vicuna7b/image_qformer.pth
pretrained_pc_qformer: https://storage.googleapis.com/sfr-xinstructblip-data-research/model/xinstructblip_checkpoints/vicuna7b/pc_qformer.pth
pretrained_video_qformer: https://storage.googleapis.com/sfr-xinstructblip-data-research/model/xinstructblip_checkpoints/vicuna7b/video_qformer.pth
pretrained_audio_qformer: https://storage.googleapis.com/sfr-xinstructblip-data-research/model/xinstructblip_checkpoints/vicuna7b/audio_qformer_improved.pth
load_attention_image_qformer: True
load_attention_pc_qformer: True
Expand Down
91 changes: 88 additions & 3 deletions lavis/datasets/builders/base_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,93 @@ def build(self):
return datasets


class MultiModalDatasetBuilder(BaseDatasetBuilder):
"""
MultiModalDatasetBuilder is a utility class designed to construct datasets
suitable for multi-modal tasks. This class simplifies the creation of
datasets that incorporate data of multiple modalities, such as text,
images, video, or audio.
"""
train_dataset_cls, eval_dataset_cls = None, None

def __init__(self, cfg=None):
super().__init__(cfg)
if isinstance(self.data_type, str):
self.data_type = [self.data_type]

def _build_processor(self, cfg_name):
cfg = self.config.get(cfg_name)
return {
split: self._build_proc_from_cfg(cfg.get(split))
if cfg is not None
else None
for split in ['train', 'eval']
}

def build_processors(self):
self.text_processors = self._build_processor("text_processor")

self.processors = {
split: {
modality: self._build_proc_from_cfg(
self.config.get(f"{'vis' if 'image' in modality else modality}_processor").get(split)
)
for modality in self.data_type
}
for split in ['train', 'eval']
}

def _download_multimodal(self, modality):
storage_path = utils.get_cache_path(self.config.build_info.get(modality).storage)
if not os.path.exists(storage_path):
warnings.warn(f"The specified path {storage_path} for {modality} inputs does not exist.")

def _download_data(self):
self._download_ann()
for modality in self.data_type:
self._download_multimodal(modality)

def _get_absolute_path(self, path):
if not os.path.isabs(path):
return utils.get_cache_path(path)
return path

def build(self):
self.build_processors()
build_info = self.config.build_info
datasets = {}

for split, info in build_info.annotations.items():
if split not in ["train", "val", "test"]:
continue

is_train = split == "train"
dataset_args = self._get_dataset_args(info, is_train)

dataset_cls = self.train_dataset_cls if is_train else self.eval_dataset_cls
datasets[split] = dataset_cls(**dataset_args)

return datasets

def _get_dataset_args(self, info, is_train):
dataset_args = dict(self.config.build_info.get('kwargs', {}))

for modality in self.data_type:
proc_name = f"{'vis' if 'image' in modality else modality}_processor"
dataset_args[proc_name] = self.processors["train" if is_train else "eval"][modality]
mm_path = self._get_absolute_path(self.config.build_info.get(modality).storage)
dataset_args[f"{'vis' if 'image' in modality else modality}_root"] = mm_path

dataset_args['text_processor'] = self.text_processors["train" if is_train else "eval"]
dataset_args["ann_paths"] = [self._get_absolute_path(path) for path in info.storage]
dataset_args['modalities'] = self.data_type

# Conform to base
for key in ['vis_processor', 'vis_root', 'test_processor']:
dataset_args.setdefault(key, None)

return dataset_args

def load_dataset_config(cfg_path):
cfg = OmegaConf.load(cfg_path).datasets
cfg = cfg[list(cfg.keys())[0]]

return cfg
return next(iter(cfg.values()))
Loading

0 comments on commit 71a2da9

Please sign in to comment.