From e83122945a39eb08404976d25e04823eb73981cf Mon Sep 17 00:00:00 2001 From: Julio Ruiz Date: Mon, 18 May 2026 13:16:54 -0500 Subject: [PATCH] Updated batch processes --- carpa_json_to_markdown/document_update.mjs | 24 ++-- carpa_json_to_markdown/index_directus.js | 136 ++++++++++++--------- carpa_json_to_markdown/index_wp.js | 22 ++-- 3 files changed, 101 insertions(+), 81 deletions(-) diff --git a/carpa_json_to_markdown/document_update.mjs b/carpa_json_to_markdown/document_update.mjs index af932fe..fdcf6c9 100644 --- a/carpa_json_to_markdown/document_update.mjs +++ b/carpa_json_to_markdown/document_update.mjs @@ -88,7 +88,9 @@ async function createParagraphs(documentId, paragraphs, locale, type) { //Format paragraph here. Check paragraphs and create a notag version of the text for better search results. We can also add a "raw" version of the text with all tags for display purposes. let fixedHtml = para.trim(); - if(!fixedHtml.startsWith(', wrap it in

tags fixedHtml = `

${fixedHtml}

`; } @@ -115,7 +117,7 @@ async function createParagraphs(documentId, paragraphs, locale, type) { // Main function to create document in Typesense async function createDocument(data){ let document = { - code: data.locale + '-' + dayjs.unix(data.timestamp).format('YYYYMMDD') + '-' + data.activity, + code: dayjs.unix(data.timestamp).format('YYYYMMDD') + '-' + data.activity, locale: data.locale, id: data.id.toString(), type: data.type, @@ -133,17 +135,19 @@ async function createDocument(data){ thumbnail: data.thumbnail, files: { youtube: data.youtube, - video: data.files?.videos?.file || null, - audio: data.files?.audios?.[0]?.[0]?.file2 || null, - booklet: data.files?.textos?.[0]?.[1]?.file2 || null, - simple: data.files?.textos?.[0]?.[0]?.file2 || null, + video: data.video || null, + audio: data.audio || null, + booklet: data.booklet || null, + simple: data.simple || null, }, directus: "", wp: data.id.toString(), - //rm: data.rm, - private: false, - slug: data.translations?.[0]?.interventions?.[0]?.slug || null, - body: data.body || null + rm: data.rm, + private: data.private || false, + slug: data.slug || null, + body: "", + year: data.year, + month: data.month, }; let doc = await client.collections('documents').documents().upsert(document); diff --git a/carpa_json_to_markdown/index_directus.js b/carpa_json_to_markdown/index_directus.js index 245fbaa..3183d14 100644 --- a/carpa_json_to_markdown/index_directus.js +++ b/carpa_json_to_markdown/index_directus.js @@ -2,7 +2,7 @@ //import fetch from 'node-fetch'; // For ES Modules const fs = require('fs'); const path = require('path'); - +const { parse } = require('json2csv'); const striptags = require('striptags'); const dayjs = require('dayjs'); const he = require('he'); @@ -18,7 +18,7 @@ let jsonlData = ''; const DATA_INPUT_FOLDER = './input'; const DATA_OUTPUT_FOLDER = './output'; const TEMPLATES_FOLDER = './templates'; -const LOCALE = 'pt'; +const LOCALE = 'es'; function generateMarkdown(type) { const folderPath = `${DATA_INPUT_FOLDER}/${type}`; @@ -95,6 +95,7 @@ async function generateJson( type ) { let fields = []; fields['activities_translations'] = [ 'activities_id.id', + 'activities_id.wpid', 'title', 'activities_id.date', 'activities_id.activity', @@ -150,93 +151,116 @@ async function generateJson( type ) { let nitem = {} if (type == 'activities_translations') { - nitem.id = item.activities_id?.id - nitem.type = 'activities' + nitem.language = LOCALE; + //nitem.id = item.activities_id?.id + //nitem.type = 'activities' nitem.title = item.title + nitem.timestamp = dayjs(item.activities_id?.date).unix() + nitem.date = ''; + nitem.activity = item.activities_id?.activity; + nitem.duration = item.activities_id?.duration ?? 0; + //nitem.body = he.decode(striptags(item.interventions[0]?.text)) || '' - nitem.body = item.interventions[0]?.text - nitem.private = item.activities_id?.private == 1 ? true : false; - nitem.files = {} - nitem.files.youtube = item.youtube - nitem.files.video = item.privateVideo?.filename_disk - nitem.files.audio = item.mp3?.filename_disk - nitem.files.booklet = item.pdf_booklet?.filename_disk - nitem.files.simple = item.pdf?.filename_disk - nitem.slug = item.slug nitem.place = item.activities_id?.place || null; nitem.city = item.activities_id?.city || null; nitem.state = item.activities_id?.state || null; nitem.country = item.activities_id?.country || null; - nitem.duration = item.activities_id?.duration ?? 0; - - nitem.year = dayjs(item.activities_id?.date).year().toString() - nitem.month = nitem.year + " > " + (dayjs(item.activities_id?.date).month() + 1).toString().padStart(2, "0") - - nitem.menu_country = nitem.country - nitem.menu_state = nitem.country + " > " + nitem.state - nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city - nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place - - nitem.date = dayjs(item.activities_id?.date).unix() + nitem.body = (item.interventions[0]?.text?true:false); nitem.thumbnail = item.activities_id?.thumbnail?.filename_disk + //nitem.files = {} + nitem.youtube = item.youtube + nitem.video = item.privateVideo?.filename_disk + nitem.audio = item.mp3?.filename_disk + nitem.booklet = item.pdf_booklet?.filename_disk + nitem.simple = item.pdf?.filename_disk + + nitem.directus = item.activities_id?.id; + nitem.wp = item.activities_id?.wpid; + nitem.typesense = true; + + nitem.private = item.activities_id?.private == 1 ? true : false; + nitem.slug = item.slug + + //nitem.year = dayjs(item.activities_id?.date).year().toString() + //nitem.month = nitem.year + " > " + (dayjs(item.activities_id?.date).month() + 1).toString().padStart(2, "0") + + // nitem.menu_country = nitem.country + // nitem.menu_state = nitem.country + " > " + nitem.state + // nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city + // nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place + //Filter out anything before 25/12/2021 - if( nitem.date < 1640408400 ) { - return false - } + // if( nitem.date < 1640408400 ) { + // return false + // } } if (type == 'conferences_translations') { - nitem.id = item.conferences_id?.id.toString() - nitem.type = 'conferences' + nitem.language = LOCALE; + //nitem.id = item.conferences_id?.id.toString() + //nitem.type = 'conferences' nitem.title = item.title //nitem.body = he.decode(striptags(item.text_published)) || '' - nitem.body = item.text_published - nitem.private = item.conferences_id?.public == 0 ? true : false; - nitem.files = {} - nitem.files.youtube = item.youtube - nitem.files.video = item.privateVideo?.filename_disk - nitem.files.audio = item.mp3?.filename_disk - nitem.files.booklet = item.pdf_booklet?.filename_disk - nitem.files.simple = item.pdf?.filename_disk - nitem.slug = item.slug + //nitem.title = item.title + nitem.timestamp = dayjs(item.conferences_id?.date).unix() + nitem.date = ''; + + nitem.activity = item.conferences_id?.activity; + nitem.duration = item.conferences_id?.duration ?? 0; + + //nitem.body = he.decode(striptags(item.interventions[0]?.text)) || '' nitem.place = item.conferences_id?.place || null; nitem.city = item.conferences_id?.city || null; nitem.state = item.conferences_id?.state || null; nitem.country = item.conferences_id?.country || null; - nitem.duration = item.conferences_id?.duration ?? 0; - - nitem.year = dayjs(item.conferences_id?.date).year().toString() - nitem.month = nitem.year + " > " + (dayjs(item.conferences_id?.date).month() + 1).toString().padStart(2, "0") - - nitem.menu_country = nitem.country - nitem.menu_state = nitem.country + " > " + nitem.state - nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city - nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place - - nitem.date = dayjs(item.conferences_id?.date).unix() + nitem.body = (item.text_published?true:false); nitem.thumbnail = item.conferences_id?.thumbnail?.filename_disk - } - if (nitem.title && nitem.id) { - return nitem; + + //nitem.files = {} + nitem.youtube = item.youtube + nitem.video = item.privateVideo?.filename_disk + nitem.audio = item.mp3?.filename_disk + nitem.booklet = item.pdf_booklet?.filename_disk + nitem.simple = item.pdf?.filename_disk + + nitem.directus = item.conferences_id?.id.toString() + nitem.wp = ''; + nitem.typesense = true; + + nitem.private = item.conferences_id?.public == 0 ? true : false; + nitem.slug = item.slug } + return nitem; }) - jsonlData += nitems.filter(item => item).map(JSON.stringify).join('\n') + //jsonlData += nitems.filter(item => item).map(JSON.stringify).join('\n') - writeFile(jsonlData, type) + //writeFile(jsonlData, type) + console.log() + if( nitems.length > 0 ){ + let csv = parse(nitems); + jsonlData += csv + //let csv = nitems.map(row => fields.map(fieldName => JSON.stringify(row[fieldName], replacer)).join(',')); + //return csv; + //jsonlData += nitems.filter(item => item !== false).map(JSON.stringify).join('\n') + + //console.log( csv ); + //console.log( documents.length + " documents to write for year " + year + " and type " + type ); + writeFile(jsonlData, type) + } }; }); } function writeFile(jsonlData, type) { - fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_${LOCALE.toLocaleUpperCase()}.json`, jsonlData, (err) => { + fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_${LOCALE.toLocaleUpperCase()}.csv`, jsonlData, (err) => { if (err) { console.error("Error writing file:", err); return; @@ -261,5 +285,5 @@ function writeFile(jsonlData, type) { //for( let year = 2021; year < 2027; year++){ //generateJson( 'activities_translations' ); -generateJson( 'activities_translations' ); +generateJson( 'conferences_translations' ); //} \ No newline at end of file diff --git a/carpa_json_to_markdown/index_wp.js b/carpa_json_to_markdown/index_wp.js index aa08fca..9a9b221 100644 --- a/carpa_json_to_markdown/index_wp.js +++ b/carpa_json_to_markdown/index_wp.js @@ -192,11 +192,11 @@ async function generateJson( type, year ) { nitem.typesense = true; nitem.rm = item.rm; nitem.private = item.private == 1 ? true : false; - //nitem.draft = item.draft - //nitem.year = dayjs(item.date).year().toString() - //nitem.month = (dayjs(item.date).month() + 1).toString().padStart(2, "0") + nitem.draft = item.draft + nitem.year = dayjs(item.date).year().toString() + nitem.month = (dayjs(item.date).month() + 1).toString().padStart(2, "0") nitem.slug = item.slug - //nitem.files = {} + nitem.files = {} } // if (nitem.title && nitem.id) { @@ -206,18 +206,10 @@ async function generateJson( type, year ) { }) if( nitems.length > 0 ){ - //let csv = parse(nitems); - // jsonlData += csv - //let csv = nitems.map(row => fields.map(fieldName => JSON.stringify(row[fieldName], replacer)).join(',')); - //return csv; - jsonlData += nitems.filter(item => item !== false).map(JSON.stringify).join('\n') - //jsonlData += (nitems.filter(item => item !== false)) - //jsonlData += csv; - - //console.log( csv ); - //console.log( documents.length + " documents to write for year " + year + " and type " + type ); + jsonlData += nitems.filter(item => item !== false).map(JSON.stringify).join('\n') + jsonlData += '\n'; } - writeFile(jsonlData, type, year) + writeFile(jsonlData, type, year) }; } });