api/carpa_json_to_markdown/index_wp.js

232 lines
7.6 KiB
JavaScript

//import fetch from 'node-fetch'; // For ES Modules
const fs = require('fs');
const path = require('path');
const striptags = require('striptags');
const dayjs = require('dayjs');
const he = require('he');
const jsonTojsonl = require('json-to-jsonl');
const fetch = require('node-fetch'); // For CommonJS
const { start } = require('repl');
const request = require('request');
let { render } = require("mustache");
let jsonlData = '';
const DATA_INPUT_FOLDER = './input';
const DATA_OUTPUT_FOLDER = './output';
const TEMPLATES_FOLDER = './templates';
const LOCALE = 'es';
function generateMarkdown(type) {
const folderPath = `${DATA_INPUT_FOLDER}/${type}`;
const files = fs.readdirSync(folderPath);
const jsonFiles = files.filter(file => file.endsWith('.json'));
const summary = [];
jsonFiles.forEach(file => {
const startYear = path.basename(file, '.json');
const filePath = path.join(folderPath, file);
const content = fs.readFileSync(filePath, 'utf8');
const data = JSON.parse(content);
//Init year for summaries
yearObject = {};
yearObject.year = startYear;
yearObject.count = data.length;
//Init months object for summary
let months = {};
let template = fs.readFileSync(`./${TEMPLATES_FOLDER}/${type}.md`).toString()
data.map((conference) => {
let month = conference.date.substring(5, 7);
let output = render(template, conference)
if (!fs.existsSync(`./${DATA_OUTPUT_FOLDER}/${type}`)) {
fs.mkdirSync(`./${DATA_OUTPUT_FOLDER}/${type}`);
}
if (!fs.existsSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}`)) {
fs.mkdirSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}`);
}
if (!fs.existsSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}/${month}`)) {
fs.mkdirSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}/${month}`);
}
fs.writeFileSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}/${month}/${conference.date}-${conference.activity}.md`, output)
//Add month count for summary
const m = conference.date.substring(5, 7);
if (!months.hasOwnProperty(m)) {
months[m] = []
}
months[m].push(m)
})
let monthsArray = [];
for (const month in months) {
let mObject = {};
mObject.month = month;
mObject.count = months[month].length;
monthsArray.push(mObject);
}
//Sort monthsarray
monthsArray.sort((a, b) => b.month - a.month); // b - a for reverse sort
yearObject.months = monthsArray;
summary.push(yearObject);
})
fs.writeFileSync(`./${DATA_OUTPUT_FOLDER}/${type}/${type}.json`, JSON.stringify(summary.reverse(), null, 2));
console.log(`Summary created successfully.`);
}
async function generateJson( type, year ) {
let options = { json: true };
let url
if( type === 'activities' ){
url = `https://actividadeswp.carpa.com/v3/actividades/?f=texts&locale=${LOCALE}&year=${year}&limit=1000`;
} else {
//url = `https://conferenciaswp.carpa.com/v3/conferencias/?f=algolia&locale=${LOCALE}&year=${year}&limit=1000&cache=false`;
url = `http://localhost:10018/v3/conferencias/?f=typesense&locale=${LOCALE}&year=${year}`;
}
request(url, options, async (error, res, body) => {
if (error) {
return console.log(error)
};
if (!error && res.statusCode == 200) {
if( body ){
// do something with JSON, using the 'body' variable
let items = []
console.log( body.length + " items found for year " + year + " and type " + type );
items = body;
if(items.length > 0) {
items.map((item) => {
if( item.id == null || item.id == undefined || item.id == '' ) {
console.log( "Item has no id, skipping." );
return false;
}})
}
let nitems = items.map((item) => {
let nitem = {}
if (type == 'activities') {
nitem.id = item.id.toString()
nitem.type = 'activities'
nitem.title = item.title
nitem.date = dayjs(item.date).unix()
nitem.activity = parseInt(item.activity)
nitem.bible_study = parseInt(item.bible_study)
nitem.place = item.place || null;
nitem.city = item.city || null;
nitem.state = item.state || null;
nitem.country = item.country || null;
nitem.duration = item.duration ?? 0;
nitem.body = item.translations?.[0]?.interventions?.[0]?.text
//nitem.text = striptags(he.decode(item.translations?.[0]?.interventions?.[0]?.text || ''))
nitem.draft = item.draft
nitem.private = false
nitem.year = dayjs(item.date).year().toString()
nitem.month = (dayjs(item.date).month() + 1).toString().padStart(2, "0")
nitem.thumbnail = item.thumbnail
nitem.slug = item.translations[0]?.interventions[0]?.slug
nitem.files = {}
nitem.files.youtube = item.youtube
nitem.files.video = item.files?.videos?.file
nitem.files.audio = item.files?.audios[0]?.[0]?.file2
nitem.files.booklet = item.files?.textos[0]?.[1]?.file2
//Filter out anything before 25/12/2021
if( nitem.date < 1640408400 ) {
return false
}
}
if (type == 'conferences') {
nitem.id = item.id.toString()
nitem.type = 'conferences'
nitem.title = item.title
nitem.date = item.timestamp
nitem.activity = parseInt(item.activity)
//nitem.bible_study = parseInt(item.bible_study)
//nitem.place = item.conferences_id?.place || null;
nitem.city = item.city || null;
nitem.state = item.state || null;
nitem.country = item.country || null;
nitem.duration = item.duration ?? 0;
nitem.body = item.body
nitem.private = item.private == 1 ? true : false;
//nitem.draft = item.draft
nitem.year = dayjs(item.date).year().toString()
nitem.month = (dayjs(item.date).month() + 1).toString().padStart(2, "0")
nitem.thumbnail = item.thumbnail
nitem.slug = item.slug
nitem.files = {}
nitem.files.youtube = item.files?.youtube
nitem.files.video = item.files?.video
nitem.files.audio = item.files?.audio
nitem.files.booklet = item.files?.pdf
nitem.files.simple = item.files?.pdf_simple
}
if (nitem.title && nitem.id) {
return nitem;
}
})
jsonlData += nitems.filter(item => item !== false).map(JSON.stringify).join('\n')
//console.log( documents.length + " documents to write for year " + year + " and type " + type );
writeFile(jsonlData, type, year)
};
}
});
}
async function writeFile(jsonlData, type, year) {
fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_wp_${LOCALE.toLocaleUpperCase()}.json`, jsonlData, (err) => {
if (err) {
console.error("Error writing file:", err);
return;
}
console.log("File has been written successfully! : " + type + " / " + year);
});
}
//generateJson( 'conferences', 1974);
//generateJson( 'activities' , 2021 );
// setTimeout( () => {
// generateMarkdown( 'conferences' )
// }, 5000 );
//generateMarkdown( 'activities' );
//generateMarkdown( 'conferences' );
for( let year = 1974; year < 2019; year++){
generateJson( 'conferences', year );
}
// for( let year = 2021; year < 2027; year++){
// generateJson( 'activities', year );
// //generateJson( 'activities_translations' );
// }