Updated batch processes
This commit is contained in:
parent
8ea83f825e
commit
e83122945a
|
|
@ -88,7 +88,9 @@ async function createParagraphs(documentId, paragraphs, locale, type) {
|
||||||
//Format paragraph here. Check paragraphs and create a notag version of the text for better search results. We can also add a "raw" version of the text with all tags for display purposes.
|
//Format paragraph here. Check paragraphs and create a notag version of the text for better search results. We can also add a "raw" version of the text with all tags for display purposes.
|
||||||
let fixedHtml = para.trim();
|
let fixedHtml = para.trim();
|
||||||
|
|
||||||
if(!fixedHtml.startsWith('<p')){
|
if(para == ' ') continue; // Skip paragraphs that are just non-breaking spaces
|
||||||
|
|
||||||
|
if(!fixedHtml.startsWith('<p') ) {
|
||||||
// If the paragraph doesn't start with <p>, wrap it in <p> tags
|
// If the paragraph doesn't start with <p>, wrap it in <p> tags
|
||||||
fixedHtml = `<p>${fixedHtml}</p>`;
|
fixedHtml = `<p>${fixedHtml}</p>`;
|
||||||
}
|
}
|
||||||
|
|
@ -115,7 +117,7 @@ async function createParagraphs(documentId, paragraphs, locale, type) {
|
||||||
// Main function to create document in Typesense
|
// Main function to create document in Typesense
|
||||||
async function createDocument(data){
|
async function createDocument(data){
|
||||||
let document = {
|
let document = {
|
||||||
code: data.locale + '-' + dayjs.unix(data.timestamp).format('YYYYMMDD') + '-' + data.activity,
|
code: dayjs.unix(data.timestamp).format('YYYYMMDD') + '-' + data.activity,
|
||||||
locale: data.locale,
|
locale: data.locale,
|
||||||
id: data.id.toString(),
|
id: data.id.toString(),
|
||||||
type: data.type,
|
type: data.type,
|
||||||
|
|
@ -133,17 +135,19 @@ async function createDocument(data){
|
||||||
thumbnail: data.thumbnail,
|
thumbnail: data.thumbnail,
|
||||||
files: {
|
files: {
|
||||||
youtube: data.youtube,
|
youtube: data.youtube,
|
||||||
video: data.files?.videos?.file || null,
|
video: data.video || null,
|
||||||
audio: data.files?.audios?.[0]?.[0]?.file2 || null,
|
audio: data.audio || null,
|
||||||
booklet: data.files?.textos?.[0]?.[1]?.file2 || null,
|
booklet: data.booklet || null,
|
||||||
simple: data.files?.textos?.[0]?.[0]?.file2 || null,
|
simple: data.simple || null,
|
||||||
},
|
},
|
||||||
directus: "",
|
directus: "",
|
||||||
wp: data.id.toString(),
|
wp: data.id.toString(),
|
||||||
//rm: data.rm,
|
rm: data.rm,
|
||||||
private: false,
|
private: data.private || false,
|
||||||
slug: data.translations?.[0]?.interventions?.[0]?.slug || null,
|
slug: data.slug || null,
|
||||||
body: data.body || null
|
body: "",
|
||||||
|
year: data.year,
|
||||||
|
month: data.month,
|
||||||
};
|
};
|
||||||
|
|
||||||
let doc = await client.collections('documents').documents().upsert(document);
|
let doc = await client.collections('documents').documents().upsert(document);
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
//import fetch from 'node-fetch'; // For ES Modules
|
//import fetch from 'node-fetch'; // For ES Modules
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
|
const { parse } = require('json2csv');
|
||||||
const striptags = require('striptags');
|
const striptags = require('striptags');
|
||||||
const dayjs = require('dayjs');
|
const dayjs = require('dayjs');
|
||||||
const he = require('he');
|
const he = require('he');
|
||||||
|
|
@ -18,7 +18,7 @@ let jsonlData = '';
|
||||||
const DATA_INPUT_FOLDER = './input';
|
const DATA_INPUT_FOLDER = './input';
|
||||||
const DATA_OUTPUT_FOLDER = './output';
|
const DATA_OUTPUT_FOLDER = './output';
|
||||||
const TEMPLATES_FOLDER = './templates';
|
const TEMPLATES_FOLDER = './templates';
|
||||||
const LOCALE = 'pt';
|
const LOCALE = 'es';
|
||||||
|
|
||||||
function generateMarkdown(type) {
|
function generateMarkdown(type) {
|
||||||
const folderPath = `${DATA_INPUT_FOLDER}/${type}`;
|
const folderPath = `${DATA_INPUT_FOLDER}/${type}`;
|
||||||
|
|
@ -95,6 +95,7 @@ async function generateJson( type ) {
|
||||||
let fields = [];
|
let fields = [];
|
||||||
fields['activities_translations'] = [
|
fields['activities_translations'] = [
|
||||||
'activities_id.id',
|
'activities_id.id',
|
||||||
|
'activities_id.wpid',
|
||||||
'title',
|
'title',
|
||||||
'activities_id.date',
|
'activities_id.date',
|
||||||
'activities_id.activity',
|
'activities_id.activity',
|
||||||
|
|
@ -150,93 +151,116 @@ async function generateJson( type ) {
|
||||||
let nitem = {}
|
let nitem = {}
|
||||||
|
|
||||||
if (type == 'activities_translations') {
|
if (type == 'activities_translations') {
|
||||||
nitem.id = item.activities_id?.id
|
nitem.language = LOCALE;
|
||||||
nitem.type = 'activities'
|
//nitem.id = item.activities_id?.id
|
||||||
|
//nitem.type = 'activities'
|
||||||
nitem.title = item.title
|
nitem.title = item.title
|
||||||
|
nitem.timestamp = dayjs(item.activities_id?.date).unix()
|
||||||
|
nitem.date = '';
|
||||||
|
nitem.activity = item.activities_id?.activity;
|
||||||
|
nitem.duration = item.activities_id?.duration ?? 0;
|
||||||
|
|
||||||
//nitem.body = he.decode(striptags(item.interventions[0]?.text)) || ''
|
//nitem.body = he.decode(striptags(item.interventions[0]?.text)) || ''
|
||||||
nitem.body = item.interventions[0]?.text
|
|
||||||
nitem.private = item.activities_id?.private == 1 ? true : false;
|
|
||||||
nitem.files = {}
|
|
||||||
nitem.files.youtube = item.youtube
|
|
||||||
nitem.files.video = item.privateVideo?.filename_disk
|
|
||||||
nitem.files.audio = item.mp3?.filename_disk
|
|
||||||
nitem.files.booklet = item.pdf_booklet?.filename_disk
|
|
||||||
nitem.files.simple = item.pdf?.filename_disk
|
|
||||||
nitem.slug = item.slug
|
|
||||||
nitem.place = item.activities_id?.place || null;
|
nitem.place = item.activities_id?.place || null;
|
||||||
nitem.city = item.activities_id?.city || null;
|
nitem.city = item.activities_id?.city || null;
|
||||||
nitem.state = item.activities_id?.state || null;
|
nitem.state = item.activities_id?.state || null;
|
||||||
nitem.country = item.activities_id?.country || null;
|
nitem.country = item.activities_id?.country || null;
|
||||||
|
|
||||||
nitem.duration = item.activities_id?.duration ?? 0;
|
nitem.body = (item.interventions[0]?.text?true:false);
|
||||||
|
|
||||||
nitem.year = dayjs(item.activities_id?.date).year().toString()
|
|
||||||
nitem.month = nitem.year + " > " + (dayjs(item.activities_id?.date).month() + 1).toString().padStart(2, "0")
|
|
||||||
|
|
||||||
nitem.menu_country = nitem.country
|
|
||||||
nitem.menu_state = nitem.country + " > " + nitem.state
|
|
||||||
nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city
|
|
||||||
nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place
|
|
||||||
|
|
||||||
nitem.date = dayjs(item.activities_id?.date).unix()
|
|
||||||
|
|
||||||
nitem.thumbnail = item.activities_id?.thumbnail?.filename_disk
|
nitem.thumbnail = item.activities_id?.thumbnail?.filename_disk
|
||||||
|
|
||||||
|
//nitem.files = {}
|
||||||
|
nitem.youtube = item.youtube
|
||||||
|
nitem.video = item.privateVideo?.filename_disk
|
||||||
|
nitem.audio = item.mp3?.filename_disk
|
||||||
|
nitem.booklet = item.pdf_booklet?.filename_disk
|
||||||
|
nitem.simple = item.pdf?.filename_disk
|
||||||
|
|
||||||
|
nitem.directus = item.activities_id?.id;
|
||||||
|
nitem.wp = item.activities_id?.wpid;
|
||||||
|
nitem.typesense = true;
|
||||||
|
|
||||||
|
nitem.private = item.activities_id?.private == 1 ? true : false;
|
||||||
|
nitem.slug = item.slug
|
||||||
|
|
||||||
|
//nitem.year = dayjs(item.activities_id?.date).year().toString()
|
||||||
|
//nitem.month = nitem.year + " > " + (dayjs(item.activities_id?.date).month() + 1).toString().padStart(2, "0")
|
||||||
|
|
||||||
|
// nitem.menu_country = nitem.country
|
||||||
|
// nitem.menu_state = nitem.country + " > " + nitem.state
|
||||||
|
// nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city
|
||||||
|
// nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place
|
||||||
|
|
||||||
//Filter out anything before 25/12/2021
|
//Filter out anything before 25/12/2021
|
||||||
if( nitem.date < 1640408400 ) {
|
// if( nitem.date < 1640408400 ) {
|
||||||
return false
|
// return false
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == 'conferences_translations') {
|
if (type == 'conferences_translations') {
|
||||||
nitem.id = item.conferences_id?.id.toString()
|
nitem.language = LOCALE;
|
||||||
nitem.type = 'conferences'
|
//nitem.id = item.conferences_id?.id.toString()
|
||||||
|
//nitem.type = 'conferences'
|
||||||
nitem.title = item.title
|
nitem.title = item.title
|
||||||
//nitem.body = he.decode(striptags(item.text_published)) || ''
|
//nitem.body = he.decode(striptags(item.text_published)) || ''
|
||||||
nitem.body = item.text_published
|
//nitem.title = item.title
|
||||||
nitem.private = item.conferences_id?.public == 0 ? true : false;
|
nitem.timestamp = dayjs(item.conferences_id?.date).unix()
|
||||||
nitem.files = {}
|
nitem.date = '';
|
||||||
nitem.files.youtube = item.youtube
|
|
||||||
nitem.files.video = item.privateVideo?.filename_disk
|
nitem.activity = item.conferences_id?.activity;
|
||||||
nitem.files.audio = item.mp3?.filename_disk
|
nitem.duration = item.conferences_id?.duration ?? 0;
|
||||||
nitem.files.booklet = item.pdf_booklet?.filename_disk
|
|
||||||
nitem.files.simple = item.pdf?.filename_disk
|
//nitem.body = he.decode(striptags(item.interventions[0]?.text)) || ''
|
||||||
nitem.slug = item.slug
|
|
||||||
nitem.place = item.conferences_id?.place || null;
|
nitem.place = item.conferences_id?.place || null;
|
||||||
nitem.city = item.conferences_id?.city || null;
|
nitem.city = item.conferences_id?.city || null;
|
||||||
nitem.state = item.conferences_id?.state || null;
|
nitem.state = item.conferences_id?.state || null;
|
||||||
nitem.country = item.conferences_id?.country || null;
|
nitem.country = item.conferences_id?.country || null;
|
||||||
|
|
||||||
nitem.duration = item.conferences_id?.duration ?? 0;
|
nitem.body = (item.text_published?true:false);
|
||||||
|
|
||||||
nitem.year = dayjs(item.conferences_id?.date).year().toString()
|
|
||||||
nitem.month = nitem.year + " > " + (dayjs(item.conferences_id?.date).month() + 1).toString().padStart(2, "0")
|
|
||||||
|
|
||||||
nitem.menu_country = nitem.country
|
|
||||||
nitem.menu_state = nitem.country + " > " + nitem.state
|
|
||||||
nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city
|
|
||||||
nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place
|
|
||||||
|
|
||||||
nitem.date = dayjs(item.conferences_id?.date).unix()
|
|
||||||
|
|
||||||
nitem.thumbnail = item.conferences_id?.thumbnail?.filename_disk
|
nitem.thumbnail = item.conferences_id?.thumbnail?.filename_disk
|
||||||
}
|
|
||||||
if (nitem.title && nitem.id) {
|
//nitem.files = {}
|
||||||
return nitem;
|
nitem.youtube = item.youtube
|
||||||
|
nitem.video = item.privateVideo?.filename_disk
|
||||||
|
nitem.audio = item.mp3?.filename_disk
|
||||||
|
nitem.booklet = item.pdf_booklet?.filename_disk
|
||||||
|
nitem.simple = item.pdf?.filename_disk
|
||||||
|
|
||||||
|
nitem.directus = item.conferences_id?.id.toString()
|
||||||
|
nitem.wp = '';
|
||||||
|
nitem.typesense = true;
|
||||||
|
|
||||||
|
nitem.private = item.conferences_id?.public == 0 ? true : false;
|
||||||
|
nitem.slug = item.slug
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nitem;
|
||||||
})
|
})
|
||||||
|
|
||||||
jsonlData += nitems.filter(item => item).map(JSON.stringify).join('\n')
|
//jsonlData += nitems.filter(item => item).map(JSON.stringify).join('\n')
|
||||||
|
|
||||||
|
//writeFile(jsonlData, type)
|
||||||
|
console.log()
|
||||||
|
if( nitems.length > 0 ){
|
||||||
|
let csv = parse(nitems);
|
||||||
|
jsonlData += csv
|
||||||
|
//let csv = nitems.map(row => fields.map(fieldName => JSON.stringify(row[fieldName], replacer)).join(','));
|
||||||
|
//return csv;
|
||||||
|
//jsonlData += nitems.filter(item => item !== false).map(JSON.stringify).join('\n')
|
||||||
|
|
||||||
|
//console.log( csv );
|
||||||
|
//console.log( documents.length + " documents to write for year " + year + " and type " + type );
|
||||||
writeFile(jsonlData, type)
|
writeFile(jsonlData, type)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeFile(jsonlData, type) {
|
function writeFile(jsonlData, type) {
|
||||||
fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_${LOCALE.toLocaleUpperCase()}.json`, jsonlData, (err) => {
|
fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_${LOCALE.toLocaleUpperCase()}.csv`, jsonlData, (err) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
console.error("Error writing file:", err);
|
console.error("Error writing file:", err);
|
||||||
return;
|
return;
|
||||||
|
|
@ -261,5 +285,5 @@ function writeFile(jsonlData, type) {
|
||||||
|
|
||||||
//for( let year = 2021; year < 2027; year++){
|
//for( let year = 2021; year < 2027; year++){
|
||||||
//generateJson( 'activities_translations' );
|
//generateJson( 'activities_translations' );
|
||||||
generateJson( 'activities_translations' );
|
generateJson( 'conferences_translations' );
|
||||||
//}
|
//}
|
||||||
|
|
@ -192,11 +192,11 @@ async function generateJson( type, year ) {
|
||||||
nitem.typesense = true;
|
nitem.typesense = true;
|
||||||
nitem.rm = item.rm;
|
nitem.rm = item.rm;
|
||||||
nitem.private = item.private == 1 ? true : false;
|
nitem.private = item.private == 1 ? true : false;
|
||||||
//nitem.draft = item.draft
|
nitem.draft = item.draft
|
||||||
//nitem.year = dayjs(item.date).year().toString()
|
nitem.year = dayjs(item.date).year().toString()
|
||||||
//nitem.month = (dayjs(item.date).month() + 1).toString().padStart(2, "0")
|
nitem.month = (dayjs(item.date).month() + 1).toString().padStart(2, "0")
|
||||||
nitem.slug = item.slug
|
nitem.slug = item.slug
|
||||||
//nitem.files = {}
|
nitem.files = {}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (nitem.title && nitem.id) {
|
// if (nitem.title && nitem.id) {
|
||||||
|
|
@ -206,16 +206,8 @@ async function generateJson( type, year ) {
|
||||||
})
|
})
|
||||||
|
|
||||||
if( nitems.length > 0 ){
|
if( nitems.length > 0 ){
|
||||||
//let csv = parse(nitems);
|
|
||||||
// jsonlData += csv
|
|
||||||
//let csv = nitems.map(row => fields.map(fieldName => JSON.stringify(row[fieldName], replacer)).join(','));
|
|
||||||
//return csv;
|
|
||||||
jsonlData += nitems.filter(item => item !== false).map(JSON.stringify).join('\n')
|
jsonlData += nitems.filter(item => item !== false).map(JSON.stringify).join('\n')
|
||||||
//jsonlData += (nitems.filter(item => item !== false))
|
jsonlData += '\n';
|
||||||
//jsonlData += csv;
|
|
||||||
|
|
||||||
//console.log( csv );
|
|
||||||
//console.log( documents.length + " documents to write for year " + year + " and type " + type );
|
|
||||||
}
|
}
|
||||||
writeFile(jsonlData, type, year)
|
writeFile(jsonlData, type, year)
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue