diff --git a/samples/rest/text_generation.sh b/samples/rest/text_generation.sh new file mode 100644 index 000000000..fc2d7b9a0 --- /dev/null +++ b/samples/rest/text_generation.sh @@ -0,0 +1,247 @@ +set -eu + +SCRIPT_DIR=$(dirname "$0") +MEDIA_DIR=$(realpath ${SCRIPT_DIR}/../../third_party) + +IMG_PATH=${MEDIA_DIR}/organ.jpg +AUDIO_PATH=${MEDIA_DIR}/sample.mp3 +VIDEO_PATH=${MEDIA_DIR}/Big_Buck_Bunny.mp4 + +BASE_URL="https://generativelanguage.googleapis.com" + +if [[ "$(base64 --version 2>&1)" = *"FreeBSD"* ]]; then + B64FLAGS="--input" +else + B64FLAGS="-w0" +fi + +echo "[START text_gen_text_only_prompt]" +# [START text_gen_text_only_prompt] +curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts":[{"text": "Write a story about a magic backpack."}] + }] + }' 2> /dev/null +# [END text_gen_text_only_prompt] + +echo "[START text_gen_text_only_prompt_streaming]" +# [START text_gen_text_only_prompt_streaming] +curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=${GOOGLE_API_KEY}" \ + -H 'Content-Type: application/json' \ + --no-buffer \ + -d '{ "contents":[{"parts":[{"text": "Write a story about a magic backpack."}]}]}' +# [END text_gen_text_only_prompt_streaming] + +echo "[START text_gen_multimodal_one_image_prompt]" +# [START text_gen_multimodal_one_image_prompt] +curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts":[ + {"text": "Tell me about this instrument"}, + { + "inline_data": { + "mime_type":"image/jpeg", + "data": "'$(base64 $B64FLAGS $IMG_PATH)'" + } + } + ] + }] + }' 2> /dev/null +# [END text_gen_multimodal_one_image_prompt] + +echo "[START text_gen_multimodal_one_image_prompt_streaming]" +# [START text_gen_multimodal_one_image_prompt_streaming] +curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts":[ + {"text": "Tell me about this instrument"}, + { + "inline_data": { + "mime_type":"image/jpeg", + "data": "'$(base64 $B64FLAGS $IMG_PATH)'" + } + } + ] + }] + }' 2> /dev/null +# [END text_gen_multimodal_one_image_prompt_streaming] + +echo "[START text_gen_multimodal_audio]" +# [START text_gen_multimodal_audio] +# Use File API to upload audio data to API request. +MIME_TYPE=$(file -b --mime-type "${AUDIO_PATH}") +NUM_BYTES=$(wc -c < "${AUDIO_PATH}") +DISPLAY_NAME=AUDIO + +tmp_header_file=upload-header.tmp + +# Initial resumable request defining metadata. +# The upload url is in the response headers dump them to a file. +curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \ + -D upload-header.tmp \ + -H "X-Goog-Upload-Protocol: resumable" \ + -H "X-Goog-Upload-Command: start" \ + -H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \ + -H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \ + -H "Content-Type: application/json" \ + -d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null + +upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r") +rm "${tmp_header_file}" + +# Upload the actual bytes. +curl "${upload_url}" \ + -H "Content-Length: ${NUM_BYTES}" \ + -H "X-Goog-Upload-Offset: 0" \ + -H "X-Goog-Upload-Command: upload, finalize" \ + --data-binary "@${AUDIO_PATH}" 2> /dev/null > file_info.json + +file_uri=$(jq ".file.uri" file_info.json) +echo file_uri=$file_uri + +curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts":[ + {"text": "Please describe this file."}, + {"file_data":{"mime_type": "audio/mpeg", "file_uri": '$file_uri'}}] + }] + }' 2> /dev/null > response.json + +cat response.json +echo + +jq ".candidates[].content.parts[].text" response.json +# [END text_gen_multimodal_audio] + +echo "[START text_gen_multimodal_video_prompt]" +# [START text_gen_multimodal_video_prompt] +# Use File API to upload audio data to API request. +MIME_TYPE=$(file -b --mime-type "${VIDEO_PATH}") +NUM_BYTES=$(wc -c < "${VIDEO_PATH}") +DISPLAY_NAME=VIDEO + +# Initial resumable request defining metadata. +# The upload url is in the response headers dump them to a file. +curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \ + -D upload-header.tmp \ + -H "X-Goog-Upload-Protocol: resumable" \ + -H "X-Goog-Upload-Command: start" \ + -H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \ + -H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \ + -H "Content-Type: application/json" \ + -d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null + +upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r") +rm "${tmp_header_file}" + +# Upload the actual bytes. +curl "${upload_url}" \ + -H "Content-Length: ${NUM_BYTES}" \ + -H "X-Goog-Upload-Offset: 0" \ + -H "X-Goog-Upload-Command: upload, finalize" \ + --data-binary "@${VIDEO_PATH}" 2> /dev/null > file_info.json + +file_uri=$(jq ".file.uri" file_info.json) +echo file_uri=$file_uri + +state=$(jq ".file.state" file_info.json) +echo state=$state + +name=$(jq ".file.name" file_info.json) +echo name=$name + +while [[ "($state)" = *"PROCESSING"* ]]; +do + echo "Processing video..." + sleep 5 + # Get the file of interest to check state + curl https://generativelanguage.googleapis.com/v1beta/files/$name > file_info.json + state=$(jq ".file.state" file_info.json) +done + +curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts":[ + {"text": "Please describe this file."}, + {"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}] + }] + }' 2> /dev/null > response.json + +cat response.json +echo + +jq ".candidates[].content.parts[].text" response.json +# [END text_gen_multimodal_video_prompt] + +echo "[START text_gen_multimodal_video_prompt_streaming]" +# [START text_gen_multimodal_video_prompt_streaming] +# Use File API to upload audio data to API request. +MIME_TYPE=$(file -b --mime-type "${VIDEO_PATH}") +NUM_BYTES=$(wc -c < "${VIDEO_PATH}") +DISPLAY_NAME=VIDEO_PATH + +# Initial resumable request defining metadata. +# The upload url is in the response headers dump them to a file. +curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \ + -D upload-header.tmp \ + -H "X-Goog-Upload-Protocol: resumable" \ + -H "X-Goog-Upload-Command: start" \ + -H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \ + -H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \ + -H "Content-Type: application/json" \ + -d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null + +upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r") +rm "${tmp_header_file}" + +# Upload the actual bytes. +curl "${upload_url}" \ + -H "Content-Length: ${NUM_BYTES}" \ + -H "X-Goog-Upload-Offset: 0" \ + -H "X-Goog-Upload-Command: upload, finalize" \ + --data-binary "@${VIDEO_PATH}" 2> /dev/null > file_info.json + +file_uri=$(jq ".file.uri" file_info.json) +echo file_uri=$file_uri + +state=$(jq ".file.state" file_info.json) +echo state=$state + +while [[ "($state)" = *"PROCESSING"* ]]; +do + echo "Processing video..." + sleep 5 + # Get the file of interest to check state + curl https://generativelanguage.googleapis.com/v1beta/files/$name > file_info.json + state=$(jq ".file.state" file_info.json) +done + +curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts":[ + {"text": "Please describe this file."}, + {"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}] + }] + }' 2> /dev/null > response.json + +cat response.json +echo +# [END text_gen_multimodal_video_prompt_streaming] \ No newline at end of file