processor: verbose: True output_dir: './output' num_processes: 2 reprocess: False sources: local: recursive: True confluence: api_token: 'your_confluence_api_token' user_email: 'your_email@example.com' url: 'https://your-confluence-url.atlassian.net' github: url: 'owner/repo' branch: 'main' google_drive: service_account_key: 'path/to/service_account_key.json' recursive: True drive_id: 'your_drive_id' partitioning: skip_infer_table_types: [] strategy: 'auto' hi_res_model_name: 'yolox' ocr_languages: ['eng'] encoding: 'utf-8' fields_include: ['element_id', 'text', 'type', 'metadata', 'embeddings'] flatten_metadata: False metadata_exclude: [] metadata_include: [] partition_endpoint: 'http://localhost' unstructured_port: 8005 partition_by_api: False # set as true if using API server default_unstructured_api_key: 123456789abcde chunking: enabled: True strategy: 'by_title' chunk_max_characters: 1500 chunk_overlap: 300 combine_under_n_chars: 1500 embedding: enabled: False provider: 'langchain-huggingface' model_name: 'intfloat/e5-large-v2' destination_connectors: enabled: False type: 'chroma' batch_size: 80 chroma: host: 'localhost' port: 8004 collection_name: 'snconf' tenant: 'default_tenant' database: 'default_database' qdrant: location: 'http://localhost:6333' collection_name: 'test' additional_processing: enabled: True extend_metadata: True replace_table_text: True table_text_key: 'text_as_html' return_langchain_docs: True convert_metadata_keys_to_string: True