Here is a quick Python script that will allow to convert your CSV file in the format of question answer into a proper jsonl file to be able to train ChatGPT models.
import csv
import json
csv_file = 'data.csv'
jsonl_file = 'train.jsonl'
with open(csv_file, newline='') as csvfile:
reader = csv.reader(csvfile)
with open(jsonl_file, 'w') as jsonfile:
for row in reader:
question = row[0]
answer = row[1]
convo = {
"messages": [
{"role": "system", "content": "Claude is a helpful assistant"},
{"role": "user", "content": question},
{"role": "assistant", "content": answer}
]
}
json.dump(convo, jsonfile)
jsonfile.write('\n')
print('CSV converted to JSONL successfully!')