petrojm's picture
add EKR files
a6c26b1
raw
history blame
1.63 kB
processor:
verbose: True
output_dir: './output'
num_processes: 2
reprocess: False
sources:
local:
recursive: True
confluence:
api_token: 'your_confluence_api_token'
user_email: '[email protected]'
url: 'https://your-confluence-url.atlassian.net'
github:
url: 'owner/repo'
branch: 'main'
google_drive:
service_account_key: 'path/to/service_account_key.json'
recursive: True
drive_id: 'your_drive_id'
partitioning:
skip_infer_table_types: []
strategy: 'auto'
hi_res_model_name: 'yolox'
ocr_languages: ['eng']
encoding: 'utf-8'
fields_include: ['element_id', 'text', 'type', 'metadata', 'embeddings']
flatten_metadata: False
metadata_exclude: []
metadata_include: []
partition_endpoint: 'http://localhost'
unstructured_port: 8005
partition_by_api: False # set as true if using API server
default_unstructured_api_key: 123456789abcde
chunking:
enabled: True
strategy: 'by_title'
chunk_max_characters: 1500
chunk_overlap: 300
combine_under_n_chars: 1500
embedding:
enabled: False
provider: 'langchain-huggingface'
model_name: 'intfloat/e5-large-v2'
destination_connectors:
enabled: False
type: 'chroma'
batch_size: 80
chroma:
host: 'localhost'
port: 8004
collection_name: 'snconf'
tenant: 'default_tenant'
database: 'default_database'
qdrant:
location: 'http://localhost:6333'
collection_name: 'test'
additional_processing:
enabled: True
extend_metadata: True
replace_table_text: True
table_text_key: 'text_as_html'
return_langchain_docs: True
convert_metadata_keys_to_string: True