POST
/
v1
/
datasets
/
files
/
upload
import json
import requests
import os

your_api_key = "YOUR_API_KEY"   # Replace with your API key
dataset_id="YOUR_DATASET_ID" or None  # replace with your dataset id or None
organization_id="YOUR_ORGANIZATION_ID" or None  # replace with your organization id or None
metadata={"your_key": "your_string_value"} or None  # replace with your metadata or None
file_path = "path/to/your/document.pdf"  # replace with your file path

url = "https://api.roe-ai.com/v1/datasets/files/upload/"
headers = {
    "Authorization": f"Bearer {your_api_key}",
}

def get_content_type(file_path):
    """Determine content type based on file extension"""
    import mimetypes
    
    # Initialize mimetypes
    mimetypes.init()
    
    # Get content type based on file extension
    content_type, _ = mimetypes.guess_type(file_path)
    
    # If content type couldn't be determined, use a default
    if not content_type:
        # Default to application/octet-stream for unknown types
        content_type = "application/octet-stream"
        
        # Common extensions that might not be in mimetypes
        ext_map = {
            '.pdf': 'application/pdf',
            '.txt': 'text/plain',
            '.csv': 'text/csv',
            '.jpg': 'image/jpeg',
            '.jpeg': 'image/jpeg',
            '.png': 'image/png',
            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
            '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
        }
        
        # Get file extension
        _, ext = os.path.splitext(file_path.lower())
        if ext in ext_map:
            content_type = ext_map[ext]
    
    return content_type


# Prepare the form data
payload = {}

# Add optional parameters if provided
if dataset_id:
    payload["dataset_id"] = dataset_id
if organization_id:
    payload["organization_id"] = organization_id
if metadata:
    payload["metadata"] = json.dumps(metadata)

# Prepare the file for upload
content_type = get_content_type(file_path)
if not content_type:
    content_type = "your_file_content_type"  # Replace with your file content type if could not be determined

with open(file_path, "rb") as file_obj:
    files = {
        "file": (os.path.basename(file_path), file_obj, content_type)
    }
    
    # Make the API request
    response = requests.post(
        url,
        headers=headers,
        data=payload,
        files=files
    )

# Process the response
if response.status_code == 200:
    print(response.json())
else:
    print(f"Error: {response.status_code}")
    print(response.text)
    
{
  "id": "123e4567-e89b-12d3-a456-426614174000",
  "dataset": {
    "id": "123e4567-e89b-12d3-a456-426614174001",
    "name": "default",
    "creator": {
      "id": 1,
      "email": "test@roe-ai.com",
      "first_name": "Test",
      "last_name": "Roe",
      "is_active": true
    },
    "created_at": "2024-04-29T14:11:16.688751-07:00"
  },
  "name": "test1.txt",
  "size": 0,
  "creator": {
    "id": 1,
    "email": "test@roe-ai.com",
    "first_name": "Test",
    "last_name": "Roe",
    "is_active": true
  },
  "created_at": "2024-07-01T16:15:51.964998-07:00"
}

Authorizations

Authorization
string
header
required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

multipart/form-data
file
file
required

File to upload. Send as a multipart form data.

dataset_id
string

(Optional) Dataset ID to upload file to. If not provided, file will be uploaded to default dataset.

Minimum length: 1
organization_id
string

(Optional) ID of organization to upload file to. Can be omitted from payload.

metadata
string

(Optional) Metadata for the file in JSON format as a STRING. Key-value pairs must ONLY be strings.

Response

200
application/json
File uploaded
id
string
required
dataset
object
required
creator
object
required
created_at
string
required
name
string | null
Maximum length: 1024
size
integer | null
Required range: -9223372036854776000 <= x <= 9223372036854776000