Create a BigQuery Table Using JSON Schema and Python
In this tutorial, we will learn how to create a new BigQuery table by defining its structure in a JSON schema file and using Python to initiate the creation. This is considered a best practice for managing complex table structures.
Step 1: Create a JSON Schema File
Save the following content as schema.json. It defines three columns: full_name, age, and city.
JSON
[
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED"
},
{
"name": "age",
"type": "INTEGER",
"mode": "NULLABLE"
},
{
"name": "city",
"type": "STRING",
"mode": "NULLABLE"
}
] Step 2: Python Code to Create Table
We will use the from_json_file method of the bigquery.Client.
Python
# Import the packages
from dotenv import load_dotenv
import os
from google.oauth2 import service_account
from google.cloud import bigquery
def main():
# Loads environment variables from a .env file
load_dotenv()
# Use environment variables from .env file
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
project_name = os.getenv("project_id")
# Get the service account credentials
credentials = service_account.Credentials.from_service_account_file(credentials_path)
# Create the BigQuery client
client = bigquery.Client(credentials=credentials, project=project_name)
# table_id = "your-project.your_dataset.your_table_name"
table_id = "ashishcoder.Coding_Dataset.new_table"
# Path to schema file
schema_file_path = "schema.json"
# Load the schema from a JSON file
# Converts JSON schema into SchemaField objects
schema = client.schema_from_json(schema_file_path)
# Initialize the table object
table = bigquery.Table(table_id, schema=schema)
# Create the table in BigQuery
table = client.create_table(table, exists_ok=True)
# Success message
print(f"Table {table_id} created successfully with schema from {schema_file_path}.")
if __name__ == "__main__":
main() Key Steps Explained
- client.schema_from_json(path) parses a local JSON schema file into a BigQuery schema.
- bigquery.Table(table_id, schema) associates the schema with a table definition.
- client.create_table(table) sends the table creation request to BigQuery.
Note: Ensure your JSON schema follows the BigQuery API format, which is an array of field objects.
Create a Clustered Table
We can create a clustered table using Python to improve query performance and cost efficiency.
Example: Let’s create a clustered table in BigQuery using the JSON schema.
Python
# Import the packages
from dotenv import load_dotenv
import os
from google.oauth2 import service_account
from google.cloud import bigquery
def main():
load_dotenv()
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
project_name = os.getenv("project_id")
credentials = service_account.Credentials.from_service_account_file(credentials_path)
client = bigquery.Client(credentials=credentials, project=project_name)
table_id = "ashishcoder.Coding_Dataset.new_clustered_table"
schema_file_path = "schema.json"
# Load schema from JSON
schema = client.schema_from_json(schema_file_path)
# Create table object
table = bigquery.Table(table_id, schema=schema)
# Specify clustering fields
table.clustering_fields = ["city"]
# Create table
table = client.create_table(table, exists_ok=True)
print(f"Created table {table_id}.")
if __name__ == "__main__":
main() - Clustering organizes data based on selected columns.
- Improves query performance when filtering by clustered fields.
- Reduces query cost by minimizing scanned data.