Quickstart

Accessing Extract with a trial account

This guide is meant for users who would like to access Extract programatically. If you do not yet have an account, sign up (opens in a new tab) for access.

To get started, visit the authenication quickstart to learn how to generate access tokens.

Once you have an access token, you can send documents for extraction.

request_url = "https://extract.kensho.com/v2/extractions"
headers = {"Authorization": f"Bearer {access_token}"}
print("Sending a document to extract")
response = requests.post(
    request_url,
    files=dict(file=open(filename, 'rb')),
    # "document_type" should be "broker_research" or "general"
    data={
        "document_type": "general",
        # To OCR all pages in a document, uncomment the following:
        # "ocr": "true",
        # To enable the enhanced table extraction, uncomment the following:
        # "enhanced_table_extraction": "true",
    },
    headers=headers,
)
response.raise_for_status()
 
request_id = response.json()["request_id"]
response_url = f"{request_url}/{request_id}"
 
params = {}
# To include bounding boxes in the output, uncomment the following:
# params["output_format"] = "structured_document_with_locations"
 
print("Waiting for job %s", request_id)
response = requests.get(response_url, headers=headers, params=params)
 
while response.status_code == 200 and response.json()['status'] == 'pending':
    time.sleep(2)
    response = requests.get(response_url, headers=headers, params=params)
 
if response.status_code == 200:
    print(response.json())