Roe Datasets
Upload File
Upload a file to a dataset.
POST
/
v1
/
datasets
/
files
/
upload
import json
import requests
import os
your_api_key = "YOUR_API_KEY" # Replace with your API key
dataset_id="YOUR_DATASET_ID" or None # replace with your dataset id or None
organization_id="YOUR_ORGANIZATION_ID" or None # replace with your organization id or None
metadata={"your_key": "your_string_value"} or None # replace with your metadata or None
file_path = "path/to/your/document.pdf" # replace with your file path
url = "https://api.roe-ai.com/v1/datasets/files/upload/"
headers = {
"Authorization": f"Bearer {your_api_key}",
}
def get_content_type(file_path):
"""Determine content type based on file extension"""
import mimetypes
# Initialize mimetypes
mimetypes.init()
# Get content type based on file extension
content_type, _ = mimetypes.guess_type(file_path)
# If content type couldn't be determined, use a default
if not content_type:
# Default to application/octet-stream for unknown types
content_type = "application/octet-stream"
# Common extensions that might not be in mimetypes
ext_map = {
'.pdf': 'application/pdf',
'.txt': 'text/plain',
'.csv': 'text/csv',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
}
# Get file extension
_, ext = os.path.splitext(file_path.lower())
if ext in ext_map:
content_type = ext_map[ext]
return content_type
# Prepare the form data
payload = {}
# Add optional parameters if provided
if dataset_id:
payload["dataset_id"] = dataset_id
if organization_id:
payload["organization_id"] = organization_id
if metadata:
payload["metadata"] = json.dumps(metadata)
# Prepare the file for upload
content_type = get_content_type(file_path)
if not content_type:
content_type = "your_file_content_type" # Replace with your file content type if could not be determined
with open(file_path, "rb") as file_obj:
files = {
"file": (os.path.basename(file_path), file_obj, content_type)
}
# Make the API request
response = requests.post(
url,
headers=headers,
data=payload,
files=files
)
# Process the response
if response.status_code == 200:
print(response.json())
else:
print(f"Error: {response.status_code}")
print(response.text)
{
"id": "123e4567-e89b-12d3-a456-426614174000",
"dataset": {
"id": "123e4567-e89b-12d3-a456-426614174001",
"name": "default",
"creator": {
"id": 1,
"email": "test@roe-ai.com",
"first_name": "Test",
"last_name": "Roe",
"is_active": true
},
"created_at": "2024-04-29T14:11:16.688751-07:00"
},
"name": "test1.txt",
"size": 0,
"creator": {
"id": 1,
"email": "test@roe-ai.com",
"first_name": "Test",
"last_name": "Roe",
"is_active": true
},
"created_at": "2024-07-01T16:15:51.964998-07:00"
}
Authorizations
Bearer authentication header of the form Bearer <token>
, where <token>
is your auth token.
Body
multipart/form-data
File to upload. Send as a multipart form data.
(Optional) Dataset ID to upload file to. If not provided, file will be uploaded to default dataset.
Minimum length:
1
(Optional) ID of organization to upload file to. Can be omitted from payload.
(Optional) Metadata for the file in JSON format as a STRING. Key-value pairs must ONLY be strings.
Response
200
application/json
File uploaded
Maximum length:
1024
Maximum length:
254
Maximum length:
150
Maximum length:
150
Designates whether this user should be treated as active. Unselect this instead of deleting accounts.
Maximum length:
1024
Required range:
-9223372036854776000 <= x <= 9223372036854776000
import json
import requests
import os
your_api_key = "YOUR_API_KEY" # Replace with your API key
dataset_id="YOUR_DATASET_ID" or None # replace with your dataset id or None
organization_id="YOUR_ORGANIZATION_ID" or None # replace with your organization id or None
metadata={"your_key": "your_string_value"} or None # replace with your metadata or None
file_path = "path/to/your/document.pdf" # replace with your file path
url = "https://api.roe-ai.com/v1/datasets/files/upload/"
headers = {
"Authorization": f"Bearer {your_api_key}",
}
def get_content_type(file_path):
"""Determine content type based on file extension"""
import mimetypes
# Initialize mimetypes
mimetypes.init()
# Get content type based on file extension
content_type, _ = mimetypes.guess_type(file_path)
# If content type couldn't be determined, use a default
if not content_type:
# Default to application/octet-stream for unknown types
content_type = "application/octet-stream"
# Common extensions that might not be in mimetypes
ext_map = {
'.pdf': 'application/pdf',
'.txt': 'text/plain',
'.csv': 'text/csv',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
}
# Get file extension
_, ext = os.path.splitext(file_path.lower())
if ext in ext_map:
content_type = ext_map[ext]
return content_type
# Prepare the form data
payload = {}
# Add optional parameters if provided
if dataset_id:
payload["dataset_id"] = dataset_id
if organization_id:
payload["organization_id"] = organization_id
if metadata:
payload["metadata"] = json.dumps(metadata)
# Prepare the file for upload
content_type = get_content_type(file_path)
if not content_type:
content_type = "your_file_content_type" # Replace with your file content type if could not be determined
with open(file_path, "rb") as file_obj:
files = {
"file": (os.path.basename(file_path), file_obj, content_type)
}
# Make the API request
response = requests.post(
url,
headers=headers,
data=payload,
files=files
)
# Process the response
if response.status_code == 200:
print(response.json())
else:
print(f"Error: {response.status_code}")
print(response.text)
{
"id": "123e4567-e89b-12d3-a456-426614174000",
"dataset": {
"id": "123e4567-e89b-12d3-a456-426614174001",
"name": "default",
"creator": {
"id": 1,
"email": "test@roe-ai.com",
"first_name": "Test",
"last_name": "Roe",
"is_active": true
},
"created_at": "2024-04-29T14:11:16.688751-07:00"
},
"name": "test1.txt",
"size": 0,
"creator": {
"id": 1,
"email": "test@roe-ai.com",
"first_name": "Test",
"last_name": "Roe",
"is_active": true
},
"created_at": "2024-07-01T16:15:51.964998-07:00"
}