Skip to content

Commit f9963ca

Browse files
authored
Merge pull request video-db#25 from video-db/ankit/add-transcript-params
Ankit/add transcript params
2 parents b947eff + c32c144 commit f9963ca

File tree

4 files changed

+55
-8
lines changed

4 files changed

+55
-8
lines changed

videodb/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
""" About information for videodb sdk"""
22

33

4-
__version__ = "0.2.3"
4+
__version__ = "0.2.4"
55
__title__ = "videodb"
66
__author__ = "videodb"
77
__email__ = "contact@videodb.io"

videodb/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
SceneExtractionType,
1212
MediaType,
1313
SearchType,
14+
Segmenter,
1415
SubtitleAlignment,
1516
SubtitleBorderStyle,
1617
SubtitleStyle,
@@ -41,6 +42,7 @@
4142
"SubtitleStyle",
4243
"TextStyle",
4344
"SceneExtractionType",
45+
"Segmenter",
4446
]
4547

4648

videodb/_constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ class SemanticSearchDefaultValues:
3636
score_threshold = 0.2
3737

3838

39+
class Segmenter:
40+
time = "time"
41+
word = "word"
42+
sentence = "sentence"
43+
44+
3945
class ApiPath:
4046
collection = "collection"
4147
upload = "upload"

videodb/video.py

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
IndexType,
66
SceneExtractionType,
77
SearchType,
8+
Segmenter,
89
SubtitleStyle,
910
Workflows,
1011
)
@@ -124,23 +125,61 @@ def get_thumbnails(self) -> List[Image]:
124125
)
125126
return [Image(self._connection, **thumbnail) for thumbnail in thumbnails_data]
126127

127-
def _fetch_transcript(self, force: bool = False) -> None:
128-
if self.transcript and not force:
128+
def _fetch_transcript(
129+
self,
130+
start: int = None,
131+
end: int = None,
132+
segmenter: str = Segmenter.word,
133+
length: int = 1,
134+
force: bool = None,
135+
) -> None:
136+
if (
137+
self.transcript
138+
and not start
139+
and not end
140+
and not segmenter
141+
and not length
142+
and not force
143+
):
129144
return
130145
transcript_data = self._connection.get(
131146
path=f"{ApiPath.video}/{self.id}/{ApiPath.transcription}",
132-
params={"force": "true" if force else "false"},
147+
params={
148+
"start": start,
149+
"end": end,
150+
"segmenter": segmenter,
151+
"length": length,
152+
"force": "true" if force else "false",
153+
},
133154
show_progress=True,
134155
)
135156
self.transcript = transcript_data.get("word_timestamps", [])
136157
self.transcript_text = transcript_data.get("text", "")
137158

138-
def get_transcript(self, force: bool = False) -> List[Dict]:
139-
self._fetch_transcript(force)
159+
def get_transcript(
160+
self,
161+
start: int = None,
162+
end: int = None,
163+
segmenter: str = Segmenter.word,
164+
length: int = 1,
165+
force: bool = None,
166+
) -> List[Dict]:
167+
self._fetch_transcript(
168+
start=start, end=end, segmenter=segmenter, length=length, force=force
169+
)
140170
return self.transcript
141171

142-
def get_transcript_text(self, force: bool = False) -> str:
143-
self._fetch_transcript(force)
172+
def get_transcript_text(
173+
self,
174+
start: int = None,
175+
end: int = None,
176+
segmenter: str = Segmenter.word,
177+
length: int = 1,
178+
force: bool = None,
179+
) -> str:
180+
self._fetch_transcript(
181+
start=start, end=end, segmenter=segmenter, length=length, force=force
182+
)
144183
return self.transcript_text
145184

146185
def index_spoken_words(

0 commit comments

Comments
 (0)