File size: 1,380 Bytes

adf4b14
96b0f63
adf4b14
 
96b0f63
2bfc9d7
96b0f63
2bfc9d7
96b0f63
adf4b14
2bfc9d7
96b0f63
 
 
 
adf4b14
2bfc9d7
96b0f63
 
 
adf4b14
96b0f63
adf4b14
 
 
2bfc9d7
6d9772d
2bfc9d7
96b0f63
 
 
 
 
 
 
 
 
 
 
 
 
 
adf4b14

"""
# cognition
# https://huggingface.co/datasets/Tongjilibo/self_cognition

# instruction
https://huggingface.co/datasets/arcee-ai/The-Tome
https://huggingface.co/datasets/teknium/OpenHermes-2.5

# tool/function calling
https://huggingface.co/datasets/NousResearch/hermes-function-calling-v1

# math
https://huggingface.co/datasets/ai2-adapt-dev/openmath-2-math

# agent
https://huggingface.co/datasets/arcee-ai/agent-data

# role-play

# reflection

# reasoning
https://huggingface.co/datasets/KingNish/reasoning-base-20k
https://huggingface.co/datasets/Magpie-Align/Magpie-Reasoning-150K
https://huggingface.co/datasets/thesven/gsm8k-reasoning
"""

"""
# sft
https://huggingface.co/datasets/HuggingFaceH4/no_robots
https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k
https://huggingface.co/datasets/HuggingFaceH4/deita-10k-v0-sft
https://huggingface.co/datasets/Open-Orca/slimorca-deduped-cleaned-corrected
https://huggingface.co/datasets/arcee-ai/EvolKit-20k
https://huggingface.co/datasets/ise-uiuc/Magicoder-Evol-Instruct-110K
https://huggingface.co/datasets/WizardLMTeam/WizardLM_evol_instruct_V2_196k
https://huggingface.co/datasets/ai2-adapt-dev/olmoe-commercial

# dpo
https://huggingface.co/datasets/allenai/ultrafeedback_binarized_cleaned
https://huggingface.co/datasets/kyujinpy/orca_math_dpo
https://huggingface.co/datasets/argilla/OpenHermesPreferences
"""