Generate text from an image

This sample demonstrates how to use the Gemini model to generate text from an image.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C++

Before trying this sample, follow the C++ setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI C++ API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

namespace vertex_ai = ::google::cloud::aiplatform_v1;
namespace vertex_ai_proto = ::google::cloud::aiplatform::v1;
[](std::string const& project_id, std::string const& location_id,
   std::string const& model, std::string const& prompt,
   std::string const& mime_type, std::string const& file_uri) {
  google::cloud::Location location(project_id, location_id);
  auto client = vertex_ai::PredictionServiceClient(
      vertex_ai::MakePredictionServiceConnection(location.location_id()));

  vertex_ai_proto::GenerateContentRequest request;
  request.set_model(location.FullName() + "/publishers/google/models/" +
                    model);
  auto generation_config = request.mutable_generation_config();
  generation_config->set_temperature(0.4f);
  generation_config->set_top_k(32);
  generation_config->set_top_p(1);
  generation_config->set_max_output_tokens(2048);

  auto contents = request.add_contents();
  contents->set_role("user");
  contents->add_parts()->set_text(prompt);
  auto image_part = contents->add_parts();
  image_part->mutable_file_data()->set_file_uri(file_uri);
  image_part->mutable_file_data()->set_mime_type(mime_type);

  auto response = client.GenerateContent(request);
  if (!response) throw std::move(response).status();

  for (auto const& candidate : response->candidates()) {
    for (auto const& p : candidate.content().parts()) {
      std::cout << p.text() << "\n";
    }
  }
}

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Java API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import com.google.cloud.vertexai.VertexAI;
import com.google.cloud.vertexai.api.GenerateContentResponse;
import com.google.cloud.vertexai.generativeai.ContentMaker;
import com.google.cloud.vertexai.generativeai.GenerativeModel;
import com.google.cloud.vertexai.generativeai.PartMaker;
import java.io.IOException;

public class Quickstart {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-google-cloud-project-id";
    String location = "us-central1";
    String modelName = "gemini-1.5-flash-001";

    String output = quickstart(projectId, location, modelName);
    System.out.println(output);
  }

  // Analyzes the provided Multimodal input.
  public static String quickstart(String projectId, String location, String modelName)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs
    // to be created once, and can be reused for multiple requests.
    try (VertexAI vertexAI = new VertexAI(projectId, location)) {
      String imageUri = "gs://generativeai-downloads/images/scones.jpg";

      GenerativeModel model = new GenerativeModel(modelName, vertexAI);
      GenerateContentResponse response = model.generateContent(ContentMaker.fromMultiModalData(
          PartMaker.fromMimeTypeAndData("image/png", imageUri),
          "What's in this photo"
      ));

      return response.toString();
    }
  }
}

Node.js

Before trying this sample, follow the Node.js setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Node.js API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

const {VertexAI} = require('@google-cloud/vertexai');

/**
 * TODO(developer): Update these variables before running the sample.
 */
async function createNonStreamingMultipartContent(
  projectId = 'PROJECT_ID',
  location = 'us-central1',
  model = 'gemini-1.5-flash-001',
  image = 'gs://generativeai-downloads/images/scones.jpg',
  mimeType = 'image/jpeg'
) {
  // Initialize Vertex with your Cloud project and location
  const vertexAI = new VertexAI({project: projectId, location: location});

  // Instantiate the model
  const generativeVisionModel = vertexAI.getGenerativeModel({
    model: model,
  });

  // For images, the SDK supports both Google Cloud Storage URI and base64 strings
  const filePart = {
    fileData: {
      fileUri: image,
      mimeType: mimeType,
    },
  };

  const textPart = {
    text: 'what is shown in this image?',
  };

  const request = {
    contents: [{role: 'user', parts: [filePart, textPart]}],
  };

  console.log('Prompt Text:');
  console.log(request.contents[0].parts[1].text);

  console.log('Non-Streaming Response Text:');

  // Generate a response
  const response = await generativeVisionModel.generateContent(request);

  // Select the text from the response
  const fullTextResponse =
    response.response.candidates[0].content.parts[0].text;

  console.log(fullTextResponse);
}

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Python API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import vertexai

from vertexai.generative_models import GenerativeModel, Part

# TODO(developer): Update and un-comment below line
# PROJECT_ID = "your-project-id"
vertexai.init(project=PROJECT_ID, location="us-central1")

model = GenerativeModel("gemini-1.5-flash-002")

response = model.generate_content(
    [
        Part.from_uri(
            "gs://cloud-samples-data/generative-ai/image/scones.jpg",
            mime_type="image/jpeg",
        ),
        "What is shown in this image?",
    ]
)

print(response.text)
# That's a lovely overhead shot of a rustic-style breakfast or brunch spread.
# Here's what's in the image:
# * **Blueberry scones:** Several freshly baked blueberry scones are arranged on parchment paper.
# They look crumbly and delicious.
# ...

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.