Working with JSON in Python

Python provides the built-in json module for working with JSON data. This guide covers parsing, serialization, validation, and best practices for handling JSON in Python.

Parsing JSON

Use json.loads() to parse a JSON string into a Python dictionary:

import json

# Parse a JSON string
json_string = '{"name": "John", "age": 30, "city": "New York"}'
data = json.loads(json_string)

print(data["name"])  # John
print(data["age"])   # 30
print(type(data))    # 

# Parse JSON array
json_array = '[1, 2, 3, "four", true, null]'
items = json.loads(json_array)
print(items)  # [1, 2, 3, 'four', True, None]

Serializing to JSON

Use json.dumps() to convert Python objects to JSON strings:

import json

user = {
    "name": "Jane",
    "age": 25,
    "is_active": True,
    "hobbies": ["reading", "coding"],
    "address": None
}

# Basic serialization
json_str = json.dumps(user)
print(json_str)
# {"name": "Jane", "age": 25, "is_active": true, "hobbies": ["reading", "coding"], "address": null}

# Pretty print with indentation
pretty_json = json.dumps(user, indent=2)
print(pretty_json)
# {
#   "name": "Jane",
#   "age": 25,
#   "is_active": true,
#   "hobbies": [
#     "reading",
#     "coding"
#   ],
#   "address": null
# }

# Sort keys alphabetically
sorted_json = json.dumps(user, indent=2, sort_keys=True)

Working with JSON Files

import json

# Read JSON from file
with open('data.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Write JSON to file
user = {"name": "John", "age": 30}
with open('output.json', 'w', encoding='utf-8') as f:
    json.dump(user, f, indent=2, ensure_ascii=False)

# Append to JSON file (read, modify, write)
with open('data.json', 'r+', encoding='utf-8') as f:
    data = json.load(f)
    data['new_key'] = 'new_value'
    f.seek(0)
    json.dump(data, f, indent=2)
    f.truncate()

Validating JSON

import json

def is_valid_json(json_string):
    """Check if a string is valid JSON."""
    try:
        json.loads(json_string)
        return True
    except json.JSONDecodeError:
        return False

def validate_json(json_string):
    """Validate JSON and return detailed error info."""
    try:
        data = json.loads(json_string)
        return {
            "valid": True,
            "data": data,
            "type": type(data).__name__
        }
    except json.JSONDecodeError as e:
        return {
            "valid": False,
            "error": str(e),
            "line": e.lineno,
            "column": e.colno,
            "position": e.pos
        }

# Usage
result = validate_json('{"name": "John"}')
print(result)  # {"valid": True, "data": {"name": "John"}, "type": "dict"}

result = validate_json('{invalid}')
print(result)  # {"valid": False, "error": "...", "line": 1, "column": 2}

Custom JSON Encoders

Handle custom Python objects by creating a custom encoder:

import json
from datetime import datetime, date
from decimal import Decimal
from uuid import UUID

class CustomEncoder(json.JSONEncoder):
    """Custom JSON encoder for Python objects."""

    def default(self, obj):
        if isinstance(obj, (datetime, date)):
            return obj.isoformat()
        if isinstance(obj, Decimal):
            return float(obj)
        if isinstance(obj, UUID):
            return str(obj)
        if isinstance(obj, set):
            return list(obj)
        if isinstance(obj, bytes):
            return obj.decode('utf-8')
        return super().default(obj)

# Usage
data = {
    "timestamp": datetime.now(),
    "price": Decimal("19.99"),
    "tags": {"python", "json"},
    "id": UUID("12345678-1234-5678-1234-567812345678")
}

json_str = json.dumps(data, cls=CustomEncoder, indent=2)
print(json_str)

# Using default parameter for simple cases
json.dumps(data, default=str)  # Convert all unknown types to string

Custom Decoders (Object Hook)

import json
from datetime import datetime

def datetime_decoder(obj):
    """Convert ISO date strings to datetime objects."""
    for key, value in obj.items():
        if isinstance(value, str):
            try:
                obj[key] = datetime.fromisoformat(value)
            except ValueError:
                pass
    return obj

json_str = '{"name": "John", "created_at": "2024-01-15T10:30:00"}'
data = json.loads(json_str, object_hook=datetime_decoder)
print(data["created_at"])  # datetime object
print(type(data["created_at"]))  # 

Working with APIs (requests library)

import requests

# GET request - response is automatically parsed
response = requests.get('https://api.example.com/data')
data = response.json()  # Automatically parses JSON

# POST request with JSON body
payload = {"name": "John", "email": "john@example.com"}
response = requests.post(
    'https://api.example.com/users',
    json=payload  # Automatically serializes to JSON
)
result = response.json()

# With error handling
def fetch_json(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"JSON decode error: {e}")
        return None

JSON Schema Validation

# pip install jsonschema
from jsonschema import validate, ValidationError

schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string", "minLength": 1},
        "age": {"type": "integer", "minimum": 0},
        "email": {"type": "string", "format": "email"}
    },
    "required": ["name", "email"]
}

def validate_user(data):
    try:
        validate(instance=data, schema=schema)
        return {"valid": True}
    except ValidationError as e:
        return {"valid": False, "error": e.message, "path": list(e.path)}

# Valid data
print(validate_user({"name": "John", "email": "john@example.com", "age": 30}))
# {"valid": True}

# Invalid data
print(validate_user({"name": "", "email": "invalid"}))
# {"valid": False, "error": "...", "path": [...]}

Performance Tips

import json

# For large JSON files, use iterative parsing
# pip install ijson
import ijson

def parse_large_json(filename):
    """Parse large JSON file iteratively."""
    with open(filename, 'rb') as f:
        for item in ijson.items(f, 'items.item'):
            yield item

# Use orjson for better performance
# pip install orjson
import orjson

# 3-10x faster than standard json
data = orjson.loads(b'{"name": "John"}')
json_bytes = orjson.dumps(data, option=orjson.OPT_INDENT_2)

# Use ujson for faster parsing
# pip install ujson
import ujson
data = ujson.loads('{"name": "John"}')

Common Patterns

import json

# Merge JSON objects
def merge_json(*args):
    result = {}
    for obj in args:
        result.update(obj)
    return result

merged = merge_json({"a": 1}, {"b": 2}, {"c": 3})
print(merged)  # {"a": 1, "b": 2, "c": 3}

# Deep merge
def deep_merge(base, update):
    result = base.copy()
    for key, value in update.items():
        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
            result[key] = deep_merge(result[key], value)
        else:
            result[key] = value
    return result

# Convert between JSON and Python dataclasses
from dataclasses import dataclass, asdict

@dataclass
class User:
    name: str
    age: int
    email: str

user = User("John", 30, "john@example.com")
json_str = json.dumps(asdict(user))
print(json_str)  # {"name": "John", "age": 30, "email": "john@example.com"}

Best Practices