# Rename this file to app_config.env and place it in the folder config/ (i.e. it will be located at app_base_folder/config/app_config.env). The app will then automatically load in these variables at startup. See tools/config.py for all the possible config variables you can set, or src/app_settings.qmd for descriptions. Below are some suggested config variables to start # General app run options TESSERACT_FOLDER=tesseract/ # If in a custom folder, not needed if in PATH POPPLER_FOLDER=poppler/poppler-24.02.0/Library/bin/ # If in a custom folder, Not needed if in PATH GRADIO_SERVER_NAME=127.0.0.1 GRADIO_SERVER_PORT=7860 USER_GUIDE_URL= CUSTOM_BOX_COLOUR=(128, 128, 128) RUN_FASTAPI=False FAVICON_PATH=favicon.png INTRO_TEXT=intros/short_intro.txt # GUI options SHOW_QUICKSTART=False SHOW_SUMMARISATION=True SHOW_EXAMPLES=True SHOW_DIFFICULT_OCR_EXAMPLES=True SHOW_LANGUAGE_SELECTION=True SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS=False SHOW_COSTS=True SHOW_LOCAL_OCR_MODEL_OPTIONS=True SHOW_ALL_OUTPUTS_IN_OUTPUT_FOLDER=True SHOW_PII_IDENTIFICATION_OPTIONS=True SHOW_LOCAL_PII_DETECTION_OPTIONS=True SHOW_OCR_GUI_OPTIONS=True EXTRACTION_AND_PII_OPTIONS_OPEN_BY_DEFAULT=True # Model / redaction process options DEFAULT_LOCAL_OCR_MODEL=tesseract OVERWRITE_EXISTING_OCR_RESULTS=False PREPROCESS_LOCAL_OCR_IMAGES=False # Whether to apply corrections to input images before processing. Will slow down redaction processes MAX_WORKERS=4 # How many workers should be working in parallel to run various text extraction/redaction tasks. Adjust depending on how many CPUs your computer has EFFICIENT_OCR=True OVERWRITE_EXISTING_OCR_RESULTS=True INCLUDE_OCR_VISUALISATION_IN_OUTPUT_FILES=True # Redaction box appearance CUSTOM_BOX_COLOUR=(128, 128, 128) USE_GUI_BOX_COLOURS_FOR_OUTPUTS=False # Image save options SAVE_PAGE_OCR_VISUALISATIONS=True SAVE_PREPROCESS_IMAGES=True # Saving and logging variables SAVE_LOGS_TO_CSV=True SESSION_OUTPUT_FOLDER=True # Save outputs into user session folders DISPLAY_FILE_NAMES_IN_LOGS=False # PaddleOCR SHOW_PADDLE_MODEL_OPTIONS=False LOAD_PADDLE_AT_STARTUP=False PADDLE_MAX_WORKERS=4 # Number of simultaneous workers for Paddle OCR tasks. Generally advised to keep at 1, but may work with 2 or more depending on your system. # GUI show VLM/LLM models SHOW_HYBRID_MODELS=False SHOW_CUSTOM_VLM_ENTITIES=False SHOW_VLM_MODEL_OPTIONS=True SHOW_INFERENCE_SERVER_PII_OPTIONS=False SHOW_INFERENCE_SERVER_VLM_OPTIONS=False SHOW_TRANSFORMERS_LLM_PII_DETECTION_OPTIONS=False # VLM using Transformers options SELECTED_LOCAL_TRANSFORMERS_VLM_MODEL=Qwen3.5-9B QUANTISE_VLM_MODELS=False USE_TRANSFORMERS_VLM_MODEL_AS_LLM=True LOCAL_TRANSFORMERS_LLM_PII_MODEL_CHOICE=None QUANTISE_TRANSFORMERS_LLM_MODELS=False LOAD_TRANSFORMERS_LLM_PII_MODEL_AT_START=False LOAD_TRANSFORMERS_VLM_MODEL_AT_START=True # VLM using inference server options (vLLM / Llama.cpp server) INFERENCE_SERVER_API_URL=http://192.168.0.220:8080 USE_LLAMA_SWAP=True INFERENCE_SERVER_LLM_PII_MODEL_CHOICE=qwen_3_5_27b # General VLM / LLM options VLM_DISABLE_QWEN3_5_THINKING=True LLM_MAX_NEW_TOKENS=8192 CUSTOM_VLM_BACKEND=bedrock_vlm # Which model type to use to do face / signature detection. Can choose from "transformers_vlm", "inference_vlm", "bedrock_vlm" # AWS related variables RUN_AWS_FUNCTIONS=True # Set to False if you don't want to run AWS functions. You can remove all the environment variables in the following section if you don't want to use them AWS_REGION=example-region DOCUMENT_REDACTION_BUCKET=example-bucket SHOW_AWS_TEXT_EXTRACTION_OPTIONS=True SHOW_AWS_PII_DETECTION_OPTIONS=True SHOW_AWS_EXAMPLES=True RUN_ALL_EXAMPLES_THROUGH_AWS=True SAVE_LOGS_TO_DYNAMODB=True ACCESS_LOG_DYNAMODB_TABLE_NAME=example-dynamodb-access-log USAGE_LOG_DYNAMODB_TABLE_NAME=example-dynamodb-usage FEEDBACK_LOG_DYNAMODB_TABLE_NAME=example-dynamodb-feedback # AWS Textract options SHOW_WHOLE_DOCUMENT_TEXTRACT_CALL_OPTIONS=True LOAD_PREVIOUS_TEXTRACT_JOBS_S3=True TEXTRACT_WHOLE_DOCUMENT_ANALYSIS_BUCKET=example-bucket-output INCLUDE_FORM_EXTRACTION_TEXTRACT_OPTION=False INCLUDE_LAYOUT_EXTRACTION_TEXTRACT_OPTION=False INCLUDE_TABLE_EXTRACTION_TEXTRACT_OPTION=False INCLUDE_FACE_IDENTIFICATION_TEXTRACT_OPTION=False # Needs a VLM option available to work # AWS VLM / LLM options SHOW_BEDROCK_VLM_MODELS=False SHOW_AWS_BEDROCK_LLM_MODELS=False HYBRID_TEXTRACT_BEDROCK_VLM=False CLOUD_LLM_PII_MODEL_CHOICE=amazon.nova-pro-v1:0 CLOUD_LLM_PII_CUSTOM_INSTRUCTIONS_MODEL_CHOICE=anthropic.claude-sonnet-4-6 #amazon.nova-pro-v1:0 CLOUD_VLM_MODEL_CHOICE=amazon.nova-pro-v1:0 # other possibles: anthropic.claude-sonnet-4-6 #qwen.qwen3-vl-235b-a22b # anthropic.claude-sonnet-4-6 # CLOUD_SUMMARISATION_MODEL_CHOICE=amazon.nova-lite-v1:0 # Cost code related variables SHOW_COSTS=True GET_COST_CODES=True COST_CODES_PATH=config/cost_codes.csv ENFORCE_COST_CODES=True DEFAULT_COST_CODE=example_cost_code # S3 cost codes S3_COST_CODES_PATH=cost_codes.csv