Create a BigQuery Table Using JSON Schema and Python

In this tutorial, we will learn how to create a new BigQuery table by defining its structure in a JSON schema file and using Python to initiate the creation. This is considered a best practice for managing complex table structures.

Step 1: Create a JSON Schema File

Save the following content as schema.json. It defines three columns: full_name, age, and city.

JSON

[
  {
    "name": "full_name",
    "type": "STRING",
    "mode": "REQUIRED"
  },
  {
    "name": "age",
    "type": "INTEGER",
    "mode": "NULLABLE"
  },
  {
    "name": "city",
    "type": "STRING",
    "mode": "NULLABLE"
  }
]    

Step 2: Python Code to Create Table

We will use the from_json_file method of the bigquery.Client.

Python

# Import the packages
from dotenv import load_dotenv
import os
from google.oauth2 import service_account
from google.cloud import bigquery

def main():
    # Loads environment variables from a .env file
    load_dotenv()

    # Use environment variables from .env file
    credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
    project_name = os.getenv("project_id")

    # Get the service account credentials
    credentials = service_account.Credentials.from_service_account_file(credentials_path)

    # Create the BigQuery client
    client = bigquery.Client(credentials=credentials, project=project_name)

    # table_id = "your-project.your_dataset.your_table_name"
    table_id = "ashishcoder.Coding_Dataset.new_table"

    # Path to schema file
    schema_file_path = "schema.json"

    # Load the schema from a JSON file
    # Converts JSON schema into SchemaField objects
    schema = client.schema_from_json(schema_file_path)

    # Initialize the table object
    table = bigquery.Table(table_id, schema=schema)

    # Create the table in BigQuery
    table = client.create_table(table, exists_ok=True)

    # Success message
    print(f"Table {table_id} created successfully with schema from {schema_file_path}.")

if __name__ == "__main__":
    main()    

Key Steps Explained

Note: Ensure your JSON schema follows the BigQuery API format, which is an array of field objects.

Create a Clustered Table

We can create a clustered table using Python to improve query performance and cost efficiency.

Example: Let’s create a clustered table in BigQuery using the JSON schema.

Python

# Import the packages
from dotenv import load_dotenv
import os
from google.oauth2 import service_account
from google.cloud import bigquery

def main():
    load_dotenv()

    credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
    project_name = os.getenv("project_id")

    credentials = service_account.Credentials.from_service_account_file(credentials_path)

    client = bigquery.Client(credentials=credentials, project=project_name)

    table_id = "ashishcoder.Coding_Dataset.new_clustered_table"

    schema_file_path = "schema.json"

    # Load schema from JSON
    schema = client.schema_from_json(schema_file_path)

    # Create table object
    table = bigquery.Table(table_id, schema=schema)

    # Specify clustering fields
    table.clustering_fields = ["city"]

    # Create table
    table = client.create_table(table, exists_ok=True)

    print(f"Created table {table_id}.")

if __name__ == "__main__":
    main()