File size: 1,632 Bytes
a6c26b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
processor:
verbose: True
output_dir: './output'
num_processes: 2
reprocess: False
sources:
local:
recursive: True
confluence:
api_token: 'your_confluence_api_token'
user_email: '[email protected]'
url: 'https://your-confluence-url.atlassian.net'
github:
url: 'owner/repo'
branch: 'main'
google_drive:
service_account_key: 'path/to/service_account_key.json'
recursive: True
drive_id: 'your_drive_id'
partitioning:
skip_infer_table_types: []
strategy: 'auto'
hi_res_model_name: 'yolox'
ocr_languages: ['eng']
encoding: 'utf-8'
fields_include: ['element_id', 'text', 'type', 'metadata', 'embeddings']
flatten_metadata: False
metadata_exclude: []
metadata_include: []
partition_endpoint: 'http://localhost'
unstructured_port: 8005
partition_by_api: False # set as true if using API server
default_unstructured_api_key: 123456789abcde
chunking:
enabled: True
strategy: 'by_title'
chunk_max_characters: 1500
chunk_overlap: 300
combine_under_n_chars: 1500
embedding:
enabled: False
provider: 'langchain-huggingface'
model_name: 'intfloat/e5-large-v2'
destination_connectors:
enabled: False
type: 'chroma'
batch_size: 80
chroma:
host: 'localhost'
port: 8004
collection_name: 'snconf'
tenant: 'default_tenant'
database: 'default_database'
qdrant:
location: 'http://localhost:6333'
collection_name: 'test'
additional_processing:
enabled: True
extend_metadata: True
replace_table_text: True
table_text_key: 'text_as_html'
return_langchain_docs: True
convert_metadata_keys_to_string: True
|