diff --git a/wikipedia.js b/wikipedia.js index b888bea..6c02be8 100644 --- a/wikipedia.js +++ b/wikipedia.js @@ -58,12 +58,11 @@ async function getWikipediaData(language, topic) { }; const wikipediaHTMLPromise = function() { - const requestConfig = { - baseURL: "https://" + language + ".wikipedia.org/api/rest_v1/", - url: "/page/mobile-sections/" + encodedTopic, + baseURL: "https://" + language + ".wikipedia.org/w/rest.php/v1/page/", + url: encodedTopic + "/html", method: "get", - responseType: "json", + responseType: "text", headers: { "Api-User-Agent": process.env.WIKIDOCUMENTARIES_API_USER_AGENT }, @@ -72,65 +71,38 @@ async function getWikipediaData(language, topic) { else return axios.request(requestConfig); }; - const [wikipediaSummaryResponse, wikipediaHTMLResponse] - = await axios.all([wikipediaSummaryPromise(), wikipediaHTMLPromise()]); + const [summaryRes, htmlRes] = await Promise.allSettled([ + wikipediaSummaryPromise(), + wikipediaHTMLPromise() + ]); - if (wikipediaHTMLResponse.data == undefined ) { - // No wikipedia article - excerptHTML=""; - remainingHTML=null; - } - else { - var origHTML = wikipediaHTMLResponse.data.lead.sections[0].text; - var remainingHTML = null; - - if (wikipediaHTMLResponse.data.lead.disambiguation != undefined && wikipediaHTMLResponse.data.lead.disambiguation == true) { - wikipediaHTMLResponse.data.remaining.sections.forEach(section => { - origHTML += section.text; - }); + const wikipediaSummaryResponse = summaryRes.status === "fulfilled" ? summaryRes.value : null; + const wikipediaHTMLResponse = htmlRes.status === "fulfilled" ? htmlRes.value : null; + + let excerptHTML = ""; + let remainingHTML = null; + + if (wikipediaHTMLResponse && wikipediaHTMLResponse.data != null && typeof wikipediaHTMLResponse.data === 'string') { + let rawHTML = wikipediaHTMLResponse.data; + + const bodyMatch = rawHTML.match(/
]*>([\s\S]*?)<\/body>/i); + if (bodyMatch) { + rawHTML = bodyMatch[1]; } - else { - var remainingOrigHTML = ""; - - wikipediaHTMLResponse.data.remaining.sections.forEach(section => { - if (section.isReferenceSection == undefined) { - var sectionHeaderStartTag = ""; - var sectionHeaderEndTag = ""; - switch(section.toclevel) { - case 1: - sectionHeaderStartTag = "