Split dataset

curl --request POST \
  --url https://studio.edgeimpulse.com/v1/api/{projectId}/split \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "trainingSplitRatio": 123,
  "testingSplitRatio": 123,
  "validationSplitRatio": 123,
  "excludeDisabledSamples": false,
  "stratifyBy": {
    "label": true,
    "metadataKeys": [
      "<string>"
    ]
  },
  "keepTogetherMetadataKeys": [
    "<string>"
  ]
}
'

import requests

url = "https://studio.edgeimpulse.com/v1/api/{projectId}/split"

payload = {
    "trainingSplitRatio": 123,
    "testingSplitRatio": 123,
    "validationSplitRatio": 123,
    "excludeDisabledSamples": False,
    "stratifyBy": {
        "label": True,
        "metadataKeys": ["<string>"]
    },
    "keepTogetherMetadataKeys": ["<string>"]
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    trainingSplitRatio: 123,
    testingSplitRatio: 123,
    validationSplitRatio: 123,
    excludeDisabledSamples: false,
    stratifyBy: {label: true, metadataKeys: ['<string>']},
    keepTogetherMetadataKeys: ['<string>']
  })
};

fetch('https://studio.edgeimpulse.com/v1/api/{projectId}/split', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://studio.edgeimpulse.com/v1/api/{projectId}/split",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'trainingSplitRatio' => 123,
    'testingSplitRatio' => 123,
    'validationSplitRatio' => 123,
    'excludeDisabledSamples' => false,
    'stratifyBy' => [
        'label' => true,
        'metadataKeys' => [
                '<string>'
        ]
    ],
    'keepTogetherMetadataKeys' => [
        '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "x-api-key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://studio.edgeimpulse.com/v1/api/{projectId}/split"

	payload := strings.NewReader("{\n  \"trainingSplitRatio\": 123,\n  \"testingSplitRatio\": 123,\n  \"validationSplitRatio\": 123,\n  \"excludeDisabledSamples\": false,\n  \"stratifyBy\": {\n    \"label\": true,\n    \"metadataKeys\": [\n      \"<string>\"\n    ]\n  },\n  \"keepTogetherMetadataKeys\": [\n    \"<string>\"\n  ]\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://studio.edgeimpulse.com/v1/api/{projectId}/split")
  .header("x-api-key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"trainingSplitRatio\": 123,\n  \"testingSplitRatio\": 123,\n  \"validationSplitRatio\": 123,\n  \"excludeDisabledSamples\": false,\n  \"stratifyBy\": {\n    \"label\": true,\n    \"metadataKeys\": [\n      \"<string>\"\n    ]\n  },\n  \"keepTogetherMetadataKeys\": [\n    \"<string>\"\n  ]\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://studio.edgeimpulse.com/v1/api/{projectId}/split")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"trainingSplitRatio\": 123,\n  \"testingSplitRatio\": 123,\n  \"validationSplitRatio\": 123,\n  \"excludeDisabledSamples\": false,\n  \"stratifyBy\": {\n    \"label\": true,\n    \"metadataKeys\": [\n      \"<string>\"\n    ]\n  },\n  \"keepTogetherMetadataKeys\": [\n    \"<string>\"\n  ]\n}"

response = http.request(request)
puts response.read_body

{
  "success": true,
  "error": "<string>"
}

Split dataset

Performs a deterministic, in-place split of the project's dataset into "training", "testing", and optional "validation" sets. Split balancing can use the label, one or more metadata keys, or both as a composite grouping signal. Related samples can also be kept together across splits by metadata key. This is a deterministic process based on the hash of the name of the data. Returns immediately on small datasets, or starts a job on larger datasets. For example: { "trainingSplitRatio": 0.8, "testingSplitRatio": 0.1, "validationSplitRatio": 0.1, "excludeDisabledSamples": false, "stratifyBy": { "label": true, "metadataKeys": ["site", "scanner"] }, "keepTogetherMetadataKeys": ["capture_group"] } With these options, label/site/scanner are used to balance the split, while samples sharing the same capture_group value stay in the same split bucket.

POST

api

{projectId}

split

Split dataset

curl --request POST \
  --url https://studio.edgeimpulse.com/v1/api/{projectId}/split \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "trainingSplitRatio": 123,
  "testingSplitRatio": 123,
  "validationSplitRatio": 123,
  "excludeDisabledSamples": false,
  "stratifyBy": {
    "label": true,
    "metadataKeys": [
      "<string>"
    ]
  },
  "keepTogetherMetadataKeys": [
    "<string>"
  ]
}
'

import requests

url = "https://studio.edgeimpulse.com/v1/api/{projectId}/split"

payload = {
    "trainingSplitRatio": 123,
    "testingSplitRatio": 123,
    "validationSplitRatio": 123,
    "excludeDisabledSamples": False,
    "stratifyBy": {
        "label": True,
        "metadataKeys": ["<string>"]
    },
    "keepTogetherMetadataKeys": ["<string>"]
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    trainingSplitRatio: 123,
    testingSplitRatio: 123,
    validationSplitRatio: 123,
    excludeDisabledSamples: false,
    stratifyBy: {label: true, metadataKeys: ['<string>']},
    keepTogetherMetadataKeys: ['<string>']
  })
};

fetch('https://studio.edgeimpulse.com/v1/api/{projectId}/split', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://studio.edgeimpulse.com/v1/api/{projectId}/split",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'trainingSplitRatio' => 123,
    'testingSplitRatio' => 123,
    'validationSplitRatio' => 123,
    'excludeDisabledSamples' => false,
    'stratifyBy' => [
        'label' => true,
        'metadataKeys' => [
                '<string>'
        ]
    ],
    'keepTogetherMetadataKeys' => [
        '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "x-api-key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://studio.edgeimpulse.com/v1/api/{projectId}/split"

	payload := strings.NewReader("{\n  \"trainingSplitRatio\": 123,\n  \"testingSplitRatio\": 123,\n  \"validationSplitRatio\": 123,\n  \"excludeDisabledSamples\": false,\n  \"stratifyBy\": {\n    \"label\": true,\n    \"metadataKeys\": [\n      \"<string>\"\n    ]\n  },\n  \"keepTogetherMetadataKeys\": [\n    \"<string>\"\n  ]\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://studio.edgeimpulse.com/v1/api/{projectId}/split")
  .header("x-api-key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"trainingSplitRatio\": 123,\n  \"testingSplitRatio\": 123,\n  \"validationSplitRatio\": 123,\n  \"excludeDisabledSamples\": false,\n  \"stratifyBy\": {\n    \"label\": true,\n    \"metadataKeys\": [\n      \"<string>\"\n    ]\n  },\n  \"keepTogetherMetadataKeys\": [\n    \"<string>\"\n  ]\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://studio.edgeimpulse.com/v1/api/{projectId}/split")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"trainingSplitRatio\": 123,\n  \"testingSplitRatio\": 123,\n  \"validationSplitRatio\": 123,\n  \"excludeDisabledSamples\": false,\n  \"stratifyBy\": {\n    \"label\": true,\n    \"metadataKeys\": [\n      \"<string>\"\n    ]\n  },\n  \"keepTogetherMetadataKeys\": [\n    \"<string>\"\n  ]\n}"

response = http.request(request)
puts response.read_body

{
  "success": true,
  "error": "<string>"
}

Authorizations

x-api-key

string

header

required

Path Parameters

projectId

integer

required

Project ID

Body

application/json

trainingSplitRatio

number

required

Proportion of the dataset to use for training.

testingSplitRatio

number

required

Proportion of the dataset to use for testing.

validationSplitRatio

number

Proportion of the dataset to use for validation. This is experimental and may change in the future.

excludeDisabledSamples

boolean

default:false

Whether to exclude samples that are marked as disabled.

stratifyBy

object

Optional balancing targets for the split.

Show child attributes

keepTogetherMetadataKeys

string[]

List of metadata keys whose matching values must stay together in a single split. This is useful for leakage prevention across train, validation, and test.

Response

200 - application/json

Option 1
Option 2

success

boolean

required

Whether the operation succeeded

error

string

Optional error description (set if 'success' was false)

Rebalance dataset

Split dataset preview

OVERVIEW

STUDIO API ENDPOINTS

Split dataset

Authorizations

Path Parameters

Body

Response