Working with JSON in Python
Python provides the built-in json module for working with JSON data.
This guide covers parsing, serialization, validation, and best practices for handling JSON in Python.
Parsing JSON
Use json.loads() to parse a JSON string into a Python dictionary:
import json
# Parse a JSON string
json_string = '{"name": "John", "age": 30, "city": "New York"}'
data = json.loads(json_string)
print(data["name"]) # John
print(data["age"]) # 30
print(type(data)) #
# Parse JSON array
json_array = '[1, 2, 3, "four", true, null]'
items = json.loads(json_array)
print(items) # [1, 2, 3, 'four', True, None] Serializing to JSON
Use json.dumps() to convert Python objects to JSON strings:
import json
user = {
"name": "Jane",
"age": 25,
"is_active": True,
"hobbies": ["reading", "coding"],
"address": None
}
# Basic serialization
json_str = json.dumps(user)
print(json_str)
# {"name": "Jane", "age": 25, "is_active": true, "hobbies": ["reading", "coding"], "address": null}
# Pretty print with indentation
pretty_json = json.dumps(user, indent=2)
print(pretty_json)
# {
# "name": "Jane",
# "age": 25,
# "is_active": true,
# "hobbies": [
# "reading",
# "coding"
# ],
# "address": null
# }
# Sort keys alphabetically
sorted_json = json.dumps(user, indent=2, sort_keys=True) Working with JSON Files
import json
# Read JSON from file
with open('data.json', 'r', encoding='utf-8') as f:
data = json.load(f)
# Write JSON to file
user = {"name": "John", "age": 30}
with open('output.json', 'w', encoding='utf-8') as f:
json.dump(user, f, indent=2, ensure_ascii=False)
# Append to JSON file (read, modify, write)
with open('data.json', 'r+', encoding='utf-8') as f:
data = json.load(f)
data['new_key'] = 'new_value'
f.seek(0)
json.dump(data, f, indent=2)
f.truncate() Validating JSON
import json
def is_valid_json(json_string):
"""Check if a string is valid JSON."""
try:
json.loads(json_string)
return True
except json.JSONDecodeError:
return False
def validate_json(json_string):
"""Validate JSON and return detailed error info."""
try:
data = json.loads(json_string)
return {
"valid": True,
"data": data,
"type": type(data).__name__
}
except json.JSONDecodeError as e:
return {
"valid": False,
"error": str(e),
"line": e.lineno,
"column": e.colno,
"position": e.pos
}
# Usage
result = validate_json('{"name": "John"}')
print(result) # {"valid": True, "data": {"name": "John"}, "type": "dict"}
result = validate_json('{invalid}')
print(result) # {"valid": False, "error": "...", "line": 1, "column": 2} Custom JSON Encoders
Handle custom Python objects by creating a custom encoder:
import json
from datetime import datetime, date
from decimal import Decimal
from uuid import UUID
class CustomEncoder(json.JSONEncoder):
"""Custom JSON encoder for Python objects."""
def default(self, obj):
if isinstance(obj, (datetime, date)):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, set):
return list(obj)
if isinstance(obj, bytes):
return obj.decode('utf-8')
return super().default(obj)
# Usage
data = {
"timestamp": datetime.now(),
"price": Decimal("19.99"),
"tags": {"python", "json"},
"id": UUID("12345678-1234-5678-1234-567812345678")
}
json_str = json.dumps(data, cls=CustomEncoder, indent=2)
print(json_str)
# Using default parameter for simple cases
json.dumps(data, default=str) # Convert all unknown types to string Custom Decoders (Object Hook)
import json
from datetime import datetime
def datetime_decoder(obj):
"""Convert ISO date strings to datetime objects."""
for key, value in obj.items():
if isinstance(value, str):
try:
obj[key] = datetime.fromisoformat(value)
except ValueError:
pass
return obj
json_str = '{"name": "John", "created_at": "2024-01-15T10:30:00"}'
data = json.loads(json_str, object_hook=datetime_decoder)
print(data["created_at"]) # datetime object
print(type(data["created_at"])) # Working with APIs (requests library)
import requests
# GET request - response is automatically parsed
response = requests.get('https://api.example.com/data')
data = response.json() # Automatically parses JSON
# POST request with JSON body
payload = {"name": "John", "email": "john@example.com"}
response = requests.post(
'https://api.example.com/users',
json=payload # Automatically serializes to JSON
)
result = response.json()
# With error handling
def fetch_json(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
return None
except json.JSONDecodeError as e:
print(f"JSON decode error: {e}")
return None JSON Schema Validation
# pip install jsonschema
from jsonschema import validate, ValidationError
schema = {
"type": "object",
"properties": {
"name": {"type": "string", "minLength": 1},
"age": {"type": "integer", "minimum": 0},
"email": {"type": "string", "format": "email"}
},
"required": ["name", "email"]
}
def validate_user(data):
try:
validate(instance=data, schema=schema)
return {"valid": True}
except ValidationError as e:
return {"valid": False, "error": e.message, "path": list(e.path)}
# Valid data
print(validate_user({"name": "John", "email": "john@example.com", "age": 30}))
# {"valid": True}
# Invalid data
print(validate_user({"name": "", "email": "invalid"}))
# {"valid": False, "error": "...", "path": [...]} Performance Tips
import json
# For large JSON files, use iterative parsing
# pip install ijson
import ijson
def parse_large_json(filename):
"""Parse large JSON file iteratively."""
with open(filename, 'rb') as f:
for item in ijson.items(f, 'items.item'):
yield item
# Use orjson for better performance
# pip install orjson
import orjson
# 3-10x faster than standard json
data = orjson.loads(b'{"name": "John"}')
json_bytes = orjson.dumps(data, option=orjson.OPT_INDENT_2)
# Use ujson for faster parsing
# pip install ujson
import ujson
data = ujson.loads('{"name": "John"}') Common Patterns
import json
# Merge JSON objects
def merge_json(*args):
result = {}
for obj in args:
result.update(obj)
return result
merged = merge_json({"a": 1}, {"b": 2}, {"c": 3})
print(merged) # {"a": 1, "b": 2, "c": 3}
# Deep merge
def deep_merge(base, update):
result = base.copy()
for key, value in update.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
# Convert between JSON and Python dataclasses
from dataclasses import dataclass, asdict
@dataclass
class User:
name: str
age: int
email: str
user = User("John", 30, "john@example.com")
json_str = json.dumps(asdict(user))
print(json_str) # {"name": "John", "age": 30, "email": "john@example.com"} Best Practices
- Always handle
JSONDecodeErrorwhen parsing untrusted input - Use
ensure_ascii=Falsefor non-ASCII characters - Set
encoding='utf-8'when working with files - Use
indentparameter for human-readable output - Consider
orjsonorujsonfor performance-critical applications - Use JSON Schema for validating complex data structures