mirror of
https://github.com/hpware/news-analyze.git
synced 2025-06-23 13:04:23 +00:00
Make the news View working via the tty interface, and made some more
i18n stuff & update some more to add images and unknown data oh, also paragraphs are now a thing :) and do so much debugging.
This commit is contained in:
parent
b62a3cda3d
commit
8975812447
8 changed files with 177 additions and 201 deletions
|
@ -3,7 +3,7 @@ import sql from "~/server/components/postgres";
|
|||
import saveDataToSql from "~/server/scrape/save_scrape_data";
|
||||
|
||||
function cleanUpSlug(orgslug: string) {
|
||||
let slug = dirtySlug.trim();
|
||||
let slug = orgslug.trim();
|
||||
const validSlugRegex = /^[a-zA-Z0-9-]+$/;
|
||||
if (!validSlugRegex.test(slug)) {
|
||||
throw new Error("Invalid slug format");
|
||||
|
@ -13,16 +13,17 @@ function cleanUpSlug(orgslug: string) {
|
|||
|
||||
export default defineEventHandler(async (event) => {
|
||||
const slug = getRouterParam(event, "slug");
|
||||
const cleanSlug = await cleanUpSlug(slug);
|
||||
const result = await sql`
|
||||
const cleanSlug = cleanUpSlug(slug);
|
||||
/*const result = await sql`
|
||||
select * from articles_lt
|
||||
where slug = ${cleanSlug}
|
||||
`;
|
||||
if (result) {
|
||||
return result;
|
||||
`;*/
|
||||
if (false) {
|
||||
//return result;
|
||||
} else {
|
||||
const data = await lineToday(slug);
|
||||
saveDataToSql(data, slug);
|
||||
const data = await lineToday(cleanSlug);
|
||||
//saveDataToSql(data, slug);
|
||||
console.log(data);
|
||||
return data;
|
||||
}
|
||||
});
|
||||
|
|
|
@ -25,26 +25,48 @@ async function lineToday(slug: string) {
|
|||
.text()
|
||||
.replaceAll("\n", "")
|
||||
.replace(" ", "");
|
||||
const paragraph = html("article.news-content").text();
|
||||
const paragraph = [];
|
||||
const images = [];
|
||||
html("article.news-content")
|
||||
.contents()
|
||||
.each((i, element) => {
|
||||
if (element.type === "tag" && element.tagName === "figure") {
|
||||
const imgSrc = html(element).find("img").attr("src");
|
||||
if (imgSrc) {
|
||||
images.push(imgSrc);
|
||||
}
|
||||
} else if (element.type === "tag" && element.tagName === "p") {
|
||||
const text = html(element).text().trim();
|
||||
if (text) {
|
||||
paragraph.push(text);
|
||||
}
|
||||
}
|
||||
});
|
||||
const newsOrgdir = html("h4.entityPublishInfo-publisher")
|
||||
.text()
|
||||
.replaceAll("\n", "")
|
||||
.replaceAll(" ", "");
|
||||
const author = html("span.entityPublishInfo-meta-info")
|
||||
let author = "";
|
||||
const authorInfo = html("span.entityPublishInfo-meta-info")
|
||||
.text()
|
||||
.replace(/更新於.*發布於.*•/g, "")
|
||||
.replace(/更新.*發布.*•/g, "")
|
||||
.replaceAll("\n", "")
|
||||
.replaceAll(" ", "");
|
||||
|
||||
if (/更新.*發布.*/.test(authorInfo)) {
|
||||
author = "未知";
|
||||
} else {
|
||||
author = authorInfo;
|
||||
}
|
||||
return {
|
||||
title: title,
|
||||
paragraph: paragraph,
|
||||
origin: newsOrgdir,
|
||||
author: author,
|
||||
images: images,
|
||||
};
|
||||
}
|
||||
|
||||
// Texting on console only!
|
||||
//console.log(await lineToday("kEJjxKw"));
|
||||
//console.log(await lineToday("wJyR8Nw"));
|
||||
|
||||
export default lineToday;
|
||||
|
|
|
@ -2,7 +2,13 @@ import postgres from "~/server/components/postgres";
|
|||
import { v4 as uuidv4 } from "uuid";
|
||||
|
||||
async function saveDataToSql(
|
||||
data: { title: string; paragraph: string; author: string; origin: string },
|
||||
data: {
|
||||
title: string;
|
||||
paragraph: any;
|
||||
author: string;
|
||||
origin: string;
|
||||
image: any;
|
||||
},
|
||||
slug: string,
|
||||
) {
|
||||
const sql = postgres;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue