Prisma now accepts extending the prisma isntance. So you can create this upsertMany extension. (THIS EXTENSION WAS BUILD TO DO INSERT CONFLIT PATHS ON POSTGRE DATABASE. YOU MUST HAVE EXPLICITS UNIQUE KEYS ON YOU TABLE):
```
export const prisma = prismaInstance.$extends({
model: {
$allModels: {
async upsertMany<T>(
this: T,
{
data,
conflictPaths,
}: {
data: Prisma.Args<T, 'createMany'>['data'];
conflictPaths: (keyof Prisma.Args<T, 'create'>['data'])[];
},
): Promise<number> {
data = Array.isArray(data) ? data : [data];
if (data.length === 0) {
return 0;
}
//VALIDATE CONFLICT PATHS ARE INCLUDED
for (const conflictField of conflictPaths) {
const conflictKey = String(conflictField);
const someUndefined = data.some(
(record: any) => record[conflictKey] === undefined,
);
if (someUndefined) {
throw new Error(
`Conflict path "${conflictKey}" is missing in some provided data entries. ` +
`Ensure this field is included in the insert data to match the ON CONFLICT clause.`,
);
}
}
const context = Prisma.getExtensionContext(this);
const model = Prisma.dmmf.datamodel.models.find(
(model) => model.name === context.$name,
);
if (!model) {
throw new Error('No model');
}
const tableName = model.dbName || model.name;
const tableArg = Prisma.raw(`"${tableName}"`);
const writeableFields = model.fields.filter(
(field) =>
field.name !== 'createdAt' &&
!field.relationName &&
!field.isGenerated,
);
const typeRecords = await (context.$parent as any).$queryRaw`
SELECT json_agg(json_build_object(
'name', column_name,
'type',
CASE
WHEN data_type = 'USER-DEFINED' THEN udt_name
WHEN domain_name IS NOT NULL THEN domain_name
WHEN data_type = 'character' THEN 'char(' || character_maximum_length || ')'
WHEN data_type = 'character varying' THEN 'varchar(' || character_maximum_length || ')'
WHEN data_type = 'numeric' THEN 'numeric(' || numeric_precision || ',' || numeric_scale || ')'
ELSE data_type
END
))
FROM information_schema.columns
WHERE table_name = ${model.dbName || model.name}`;
if (typeRecords.length !== 1 || typeRecords[0].json_agg === void 0) {
throw new Error(
`Unable to introspect the types in table ${tableName}`,
);
}
const types = new Map<string, string>(
typeRecords[0].json_agg.map((x: { name: string; type: string }) => [
x.name,
x.type,
]),
);
for (const field of writeableFields) {
if (!types.has(field.name)) {
throw new Error(
`Unable to determine the type of ${field.name} in table ${tableName}`,
);
}
}
const allColumnData = writeableFields.map((field) => ({
name: field.name,
data: (data as any[]).map((d) => {
// IS ID AND YOUR BASE HAS UUID GENERATE AUTO UUID
if (field.isId) {
return `gen_random_uuid()`;
}
// IS UPDATED_AT GENERATE AUTO TIMESTAMP
if (field.isUpdatedAt) {
if (d[field.name] === null || d[field.name] === undefined) {
return `TO_TIMESTAMP('${dateProvider.dateTimeToString(dateProvider.dateNow())}', 'YYYY-MM-DD HH24:MI:SS')`;
} else {
return `TO_TIMESTAMP('${dateProvider.dateTimeToString(dateProvider.dateNow(d[field.name]))}', 'YYYY-MM-DD HH24:MI:SS')`;
}
}
if (d[field.name]) {
if (field.type === 'DateTime') {
let dateValue = d[field.name];
if (dateValue) {
dateValue = new Date(dateValue);
const isoString = dateValue.toISOString();
if (isoString.includes('T') && isoString.endsWith('Z')) {
// IS TIEMSATMP WITH Z
const cleanDate = isoString.replace('T', ' ').split('.')[0];
return `TO_TIMESTAMP('${cleanDate}', 'YYYY-MM-DD HH24:MI:SS')`;
}
if (
isoString.includes('T') &&
isoString.split('T')[1] !== '00:00:00.000Z'
) {
// IS TIEMSATMP
return `TO_TIMESTAMP('${isoString}', 'YYYY-MM-DD HH24:MI:SS')`;
} else {
// IS DATE
return `TO_DATE('${isoString.split('T')[0]}', 'YYYY-MM-DD')`;
}
}
}
if (field.kind === 'enum') {
// IS ENUM
return `'${d[field.name]}'::"${types.get(field.name)}"`;
}
if (field.type === 'String') {
if (helperProvider.isUuid(d[field.name])) {
return `UUID('${d[field.name]}')`;
}
return `'${d[field.name]}'`;
}
}
return d[field.name] === null ||
d[field.name] === undefined ||
d[field.name] === ''
? `NULL`
: d[field.name];
}),
type: types.get(field.name)!,
onlyUndefined: (data as any[])
.map((d) =>
field.isUpdatedAt || field.isId
? false
: d[field.name] === void 0,
)
.reduce((a, b) => a && b),
}));
const relevantColumnData = allColumnData.filter(
(x) =>
!x.onlyUndefined &&
x.data.some((d) => d !== null && d !== undefined),
);
const unnestStatements = relevantColumnData.map((x) => {
return Prisma.sql`
unnest(
${Prisma.raw(`ARRAY[${x.data.join(',')}]`)}
) AS "${Prisma.raw(x.name)}"
`;
});
const columns = relevantColumnData.map((d) => d.name);
const idExists = columns.find((c) => c === 'id');
if (!idExists) {
columns.unshift('id');
}
const columnsArg = Prisma.raw(columns.map((c) => `"${c}"`).join(','));
const conflictArg = Prisma.raw(
conflictPaths
.map((c: string | symbol | number) => `"${c as any}"`)
.join(','),
);
const updateColumns = columns.filter(
(c) =>
!conflictPaths.includes(c) &&
!relevantColumnData.some(
(x) => x.name === c && x.data.some((d) => d === null),
),
);
const updateArg = Prisma.raw(
updateColumns.map((c) => `"${c}" = EXCLUDED."${c}"`).join(','),
);
const result = await (context.$parent as any).$executeRaw`
INSERT INTO ${tableArg} (${columnsArg})
SELECT ${Prisma.join(unnestStatements)}
ON CONFLICT (${conflictArg}) DO UPDATE SET
${updateArg};`;
return result;
},
},
},
});
```
The real advantage of using upsert is doing multiples insert/update without overload application memory, using the original upsert in a single statement in my lambda function with a list of 4000 dataset was costing 2GB memory in my AWS lambda. And now with the upsertMany extension is costing 180 MB and it is more faster. Some example applying this code using batch strategy:
```
const chunkDataSize = this.getChunkDataSize.execute(data.length);
const chunkData = this.helperProvider.chunkArray(data, chunkDataSize);
await this.prismaService.getClient().$transaction(
async (transaction) => {
for (const chunk of chunkData) {
await transaction.entity.upsertMany({
data: chunk,
conflictPaths: ['key1', 'key2'],
});
}
},
{
maxWait: 5000, //increase according you dataset
timeout: 10000, //increase according you dataset
},
);
```
More discussing about this: <https://github.com/prisma/prisma/issues/4134>
Prisma now accepts extending the prisma isntance. So you can create this upsertMany extension. (THIS EXTENSION WAS BUILD TO DO INSERT CONFLIT PATHS ON POSTGRE DATABASE. YOU MUST HAVE EXPLICITS UNIQUE KEYS ON YOU TABLE):
```
export const prisma = prismaInstance.$extends({
model: {
$allModels: {
async upsertMany<T>(
this: T,
{
data,
conflictPaths,
}: {
data: Prisma.Args<T, 'createMany'>['data'];
conflictPaths: (keyof Prisma.Args<T, 'create'>['data'])[];
},
): Promise<number> {
data = Array.isArray(data) ? data : [data];
if (data.length === 0) {
return 0;
}
//VALIDATE CONFLICT PATHS ARE INCLUDED
for (const conflictField of conflictPaths) {
const conflictKey = String(conflictField);
const someUndefined = data.some(
(record: any) => record[conflictKey] === undefined,
);
if (someUndefined) {
throw new Error(
`Conflict path "${conflictKey}" is missing in some provided data entries. ` +
`Ensure this field is included in the insert data to match the ON CONFLICT clause.`,
);
}
}
const context = Prisma.getExtensionContext(this);
const model = Prisma.dmmf.datamodel.models.find(
(model) => model.name === context.$name,
);
if (!model) {
throw new Error('No model');
}
const tableName = model.dbName || model.name;
const tableArg = Prisma.raw(`"${tableName}"`);
const writeableFields = model.fields.filter(
(field) =>
field.name !== 'createdAt' &&
!field.relationName &&
!field.isGenerated,
);
const typeRecords = await (context.$parent as any).$queryRaw`
SELECT json_agg(json_build_object(
'name', column_name,
'type',
CASE
WHEN data_type = 'USER-DEFINED' THEN udt_name
WHEN domain_name IS NOT NULL THEN domain_name
WHEN data_type = 'character' THEN 'char(' || character_maximum_length || ')'
WHEN data_type = 'character varying' THEN 'varchar(' || character_maximum_length || ')'
WHEN data_type = 'numeric' THEN 'numeric(' || numeric_precision || ',' || numeric_scale || ')'
ELSE data_type
END
))
FROM information_schema.columns
WHERE table_name = ${model.dbName || model.name}`;
if (typeRecords.length !== 1 || typeRecords[0].json_agg === void 0) {
throw new Error(
`Unable to introspect the types in table ${tableName}`,
);
}
const types = new Map<string, string>(
typeRecords[0].json_agg.map((x: { name: string; type: string }) => [
x.name,
x.type,
]),
);
for (const field of writeableFields) {
if (!types.has(field.name)) {
throw new Error(
`Unable to determine the type of ${field.name} in table ${tableName}`,
);
}
}
const allColumnData = writeableFields.map((field) => ({
name: field.name,
data: (data as any[]).map((d) => {
// IS ID AND YOUR BASE HAS UUID GENERATE AUTO UUID
if (field.isId) {
return `gen_random_uuid()`;
}
// IS UPDATED_AT GENERATE AUTO TIMESTAMP
if (field.isUpdatedAt) {
if (d[field.name] === null || d[field.name] === undefined) {
return `TO_TIMESTAMP('${dateProvider.dateTimeToString(dateProvider.dateNow())}', 'YYYY-MM-DD HH24:MI:SS')`;
} else {
return `TO_TIMESTAMP('${dateProvider.dateTimeToString(dateProvider.dateNow(d[field.name]))}', 'YYYY-MM-DD HH24:MI:SS')`;
}
}
if (d[field.name]) {
if (field.type === 'DateTime') {
let dateValue = d[field.name];
if (dateValue) {
dateValue = new Date(dateValue);
const isoString = dateValue.toISOString();
if (isoString.includes('T') && isoString.endsWith('Z')) {
// IS TIEMSATMP WITH Z
const cleanDate = isoString.replace('T', ' ').split('.')[0];
return `TO_TIMESTAMP('${cleanDate}', 'YYYY-MM-DD HH24:MI:SS')`;
}
if (
isoString.includes('T') &&
isoString.split('T')[1] !== '00:00:00.000Z'
) {
// IS TIEMSATMP
return `TO_TIMESTAMP('${isoString}', 'YYYY-MM-DD HH24:MI:SS')`;
} else {
// IS DATE
return `TO_DATE('${isoString.split('T')[0]}', 'YYYY-MM-DD')`;
}
}
}
if (field.kind === 'enum') {
// IS ENUM
return `'${d[field.name]}'::"${types.get(field.name)}"`;
}
if (field.type === 'String') {
if (helperProvider.isUuid(d[field.name])) {
return `UUID('${d[field.name]}')`;
}
return `'${d[field.name]}'`;
}
}
return d[field.name] === null ||
d[field.name] === undefined ||
d[field.name] === ''
? `NULL`
: d[field.name];
}),
type: types.get(field.name)!,
onlyUndefined: (data as any[])
.map((d) =>
field.isUpdatedAt || field.isId
? false
: d[field.name] === void 0,
)
.reduce((a, b) => a && b),
}));
const relevantColumnData = allColumnData.filter(
(x) =>
!x.onlyUndefined &&
x.data.some((d) => d !== null && d !== undefined),
);
const unnestStatements = relevantColumnData.map((x) => {
return Prisma.sql`
unnest(
${Prisma.raw(`ARRAY[${x.data.join(',')}]`)}
) AS "${Prisma.raw(x.name)}"
`;
});
const columns = relevantColumnData.map((d) => d.name);
const idExists = columns.find((c) => c === 'id');
if (!idExists) {
columns.unshift('id');
}
const columnsArg = Prisma.raw(columns.map((c) => `"${c}"`).join(','));
const conflictArg = Prisma.raw(
conflictPaths
.map((c: string | symbol | number) => `"${c as any}"`)
.join(','),
);
const updateColumns = columns.filter(
(c) =>
!conflictPaths.includes(c) &&
!relevantColumnData.some(
(x) => x.name === c && x.data.some((d) => d === null),
),
);
const updateArg = Prisma.raw(
updateColumns.map((c) => `"${c}" = EXCLUDED."${c}"`).join(','),
);
const result = await (context.$parent as any).$executeRaw`
INSERT INTO ${tableArg} (${columnsArg})
SELECT ${Prisma.join(unnestStatements)}
ON CONFLICT (${conflictArg}) DO UPDATE SET
${updateArg};`;
return result;
},
},
},
});
```
The real advantage of using upsert is doing multiples insert/update without overload application memory, using the original upsert in a single statement in my lambda function with a list of 4000 dataset was costing 2GB memory in my AWS lambda. And now with the upsertMany extension is costing 180 MB and it is more faster. Some example applying this code using batch strategy:
```
const chunkDataSize = this.getChunkDataSize.execute(data.length);
const chunkData = this.helperProvider.chunkArray(data, chunkDataSize);
await this.prismaService.getClient().$transaction(
async (transaction) => {
for (const chunk of chunkData) {
await transaction.entity.upsertMany({
data: chunk,
conflictPaths: ['key1', 'key2'],
});
}
},
{
maxWait: 5000, //increase according you dataset
timeout: 10000, //increase according you dataset
},
);
```
More discussing about this: https://github.com/prisma/prisma/issues/4134