mirror of
https://github.com/hpware/news-analyze.git
synced 2025-06-23 13:04:23 +00:00
feat: implement DraggableWindow component and update index.vue layout; enhance scraping scripts for better error handling and content extraction
This commit is contained in:
parent
8b07d4b3be
commit
b461e81360
8 changed files with 522 additions and 174 deletions
87
components/DraggableWindow.vue
Normal file
87
components/DraggableWindow.vue
Normal file
|
@ -0,0 +1,87 @@
|
|||
<script setup lang="ts">
|
||||
import { ref, onMounted, onUnmounted } from 'vue'
|
||||
|
||||
const props = defineProps<{
|
||||
title: string
|
||||
initialX?: number
|
||||
initialY?: number
|
||||
width?: string
|
||||
height?: string
|
||||
}>()
|
||||
|
||||
const emit = defineEmits(['close'])
|
||||
|
||||
const isDragging = ref(false)
|
||||
const position = ref({
|
||||
x: props.initialX || 100,
|
||||
y: props.initialY || 100
|
||||
})
|
||||
const offset = ref({ x: 0, y: 0 })
|
||||
|
||||
const startDrag = (e: MouseEvent) => {
|
||||
isDragging.value = true
|
||||
offset.value = {
|
||||
x: e.clientX - position.value.x,
|
||||
y: e.clientY - position.value.y
|
||||
}
|
||||
}
|
||||
|
||||
const doDrag = (e: MouseEvent) => {
|
||||
if (isDragging.value) {
|
||||
position.value = {
|
||||
x: e.clientX - offset.value.x,
|
||||
y: e.clientY - offset.value.y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const stopDrag = () => {
|
||||
isDragging.value = false
|
||||
}
|
||||
|
||||
onMounted(() => {
|
||||
document.addEventListener('mousemove', doDrag)
|
||||
document.addEventListener('mouseup', stopDrag)
|
||||
})
|
||||
|
||||
onUnmounted(() => {
|
||||
document.removeEventListener('mousemove', doDrag)
|
||||
document.removeEventListener('mouseup', stopDrag)
|
||||
})
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<div
|
||||
:style="{
|
||||
left: `${position.x}px`,
|
||||
top: `${position.y}px`,
|
||||
width: props.width || '400px',
|
||||
height: props.height || '300px'
|
||||
}"
|
||||
class="fixed bg-white dark:bg-gray-800 rounded-lg shadow-lg overflow-hidden"
|
||||
>
|
||||
<div
|
||||
@mousedown="startDrag"
|
||||
class="bg-gray-700 p-2 cursor-move flex justify-between items-center"
|
||||
>
|
||||
<h3 class="font-semibold">{{ title }}</h3>
|
||||
<div class="flex flex-row gap-1">
|
||||
<button
|
||||
@click="emit('close')"
|
||||
class="p-1 hover:bg-gray-300 dark:hover:bg-gray-600 rounded"
|
||||
>
|
||||
━
|
||||
</button>
|
||||
<button
|
||||
@click="emit('close')"
|
||||
class="p-1 rounded bg-red-500 text-white hover:bg-red-600 transition duration-200"
|
||||
>
|
||||
✕
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="p-4 text-black">
|
||||
<slot></slot>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
178
layouts/macui.vue
Normal file
178
layouts/macui.vue
Normal file
|
@ -0,0 +1,178 @@
|
|||
|
||||
<script setup lang="ts">
|
||||
// No layout
|
||||
|
||||
// interfaces
|
||||
interface currentNavBarInterface {
|
||||
name: string;
|
||||
icon: string;
|
||||
action: any;
|
||||
flash: boolean;
|
||||
windowAssociated: string;
|
||||
}
|
||||
|
||||
// Import plugins
|
||||
import { gsap } from "gsap";
|
||||
import { TextPlugin } from "gsap/TextPlugin";
|
||||
import { createApp } from "vue";
|
||||
gsap.registerPlugin(TextPlugin);
|
||||
|
||||
// Import Windows
|
||||
import SignIn from "~/components/app/windows/login.vue";
|
||||
|
||||
// Import Shadcn/UI components
|
||||
import AlertComponent from "~/components/ui/alert/Alert.vue";
|
||||
import ButtonComponent from "~/components/ui/button/Button.vue";
|
||||
import DialogComponent from "~/components/ui/dialog/Dialog.vue";
|
||||
import ProgressComponent from "~/components/ui/progress/Progress.vue";
|
||||
import HoverCardComponent from "~/components/ui/hover-card/HoverCard.vue";
|
||||
|
||||
// Icons
|
||||
import { ComputerDesktopIcon, UserIcon, LanguageIcon, ChevronRightIcon } from "@heroicons/vue/24/outline";
|
||||
|
||||
// i18n
|
||||
const { t, locale, locales } = useI18n();
|
||||
const switchLocalePath = useSwitchLocalePath();
|
||||
const localePath = useLocalePath();
|
||||
|
||||
// Router
|
||||
const router = useRouter();
|
||||
const route = useRoute();
|
||||
|
||||
// values
|
||||
const popMessage = ref(null);
|
||||
const menuOpen = ref(false);
|
||||
const langMenuOpen = ref(false);
|
||||
const lang = ref(locale.value);
|
||||
const alertOpen = ref(false);
|
||||
const currentNavBar = ref<currentNavBarInterface[]>([]);
|
||||
|
||||
// Date
|
||||
const currentDate = ref(
|
||||
new Date().toLocaleDateString("zh-TW", {
|
||||
month: "2-digit",
|
||||
day: "2-digit",
|
||||
year: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
second: "2-digit",
|
||||
hour12: false,
|
||||
}),
|
||||
);
|
||||
onMounted(() => {
|
||||
setInterval(() => {
|
||||
currentDate.value = new Date().toLocaleDateString("zh-TW", {
|
||||
month: "2-digit",
|
||||
day: "2-digit",
|
||||
year: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
second: "2-digit",
|
||||
hour12: false,
|
||||
});
|
||||
}, 1000);
|
||||
});
|
||||
|
||||
// functions
|
||||
const openWindow = (windowName?: string) => {
|
||||
if (windowName === "leave") {
|
||||
router.push(localePath("/home"));
|
||||
}
|
||||
console.log(windowName);
|
||||
menuOpen.value = false;
|
||||
}
|
||||
|
||||
const unMinWindow = (windowName?: string) => {
|
||||
console.log(windowName);
|
||||
}
|
||||
|
||||
// menus
|
||||
const menuItems = [
|
||||
{ name: "Hot News", windowName: "hotnews"} ,
|
||||
{ name: "News", windowName: "news"},
|
||||
{ name: "Sources", windowName: "sources"},
|
||||
{ name: 'About This Website', windowName: "about"},
|
||||
{ name: 'Settings', windowName: "settings"},
|
||||
{ name: 'Leave', windowName: "leave"},
|
||||
]
|
||||
const toggleMenu = () => {
|
||||
menuOpen.value = !menuOpen.value
|
||||
}
|
||||
// Lang Menu
|
||||
const toggleLangMenu = () => {
|
||||
langMenuOpen.value = !langMenuOpen.value
|
||||
}
|
||||
</script>
|
||||
<template>
|
||||
<div
|
||||
class="absolute inset-x-0 flex flex-row px-2 py-1 bg-[#7D7C7C]/70 text-white justify-between align-center text-center z-50"
|
||||
>
|
||||
<!--Menu container-->
|
||||
<div class="flex flex-row g-2 text-gray-400 text-white z-999">
|
||||
<button @click="toggleMenu" class="w-8 h-8 text-white hover:text-blue-500 transition-all duration-100 flex flex-row">
|
||||
<ComputerDesktopIcon/>
|
||||
</button>
|
||||
<span class="ml-1 mr-2 text-[20px]">|</span>
|
||||
<!--navbar icons for min and max application window-->
|
||||
<button class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||
</button>
|
||||
<div v-for="item in currentNavBar" :key="item.name" class="flex flex-row items-center gap-x-2 hover:bg-gray-100 transition-all duration-100 px-4 py-2 cursor-pointer">
|
||||
<button @click="unMinWindow(item.windowAssociated)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||
<span>{{ item.name }}</span>
|
||||
<span v-if="item.flash" class="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
|
||||
<span v-if="item.icon" :class="item.icon">
|
||||
</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="text-center align-middle justify-center text-white">{{ currentDate }}</div>
|
||||
</div>
|
||||
<div class="w-full h-[2.5em]"></div>
|
||||
<!--Menu-->
|
||||
<Transition
|
||||
enter-active-class="animate__animated animate__fadeInDown animate_fast03"
|
||||
leave-active-class="animate__animated animate__fadeOutUp animate_fast03"
|
||||
>
|
||||
<div class="m-2 p-2 bg-gray-800 shadow-lg w-fit rounded-[10px] v-998" v-if="menuOpen">
|
||||
<div v-for="item in menuItems" :key="item.name" class="">
|
||||
<button @click="openWindow(item.windowName)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||
<span>{{ item.name }}</span>
|
||||
<ChevronRightIcon class="w-4 h-4 justify-center align-center" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</Transition>
|
||||
<!--Main desktop contents-->
|
||||
<div
|
||||
class="flex flex-col justify-center align-center text-center absolute w-full h-screen inset-x-0 inset-y-0 z-[-1]"
|
||||
id="desktop"
|
||||
>
|
||||
</div>
|
||||
<slot/>
|
||||
<!--Footer-->
|
||||
<div
|
||||
class="absolute w-[calc(100% - 5px)] inset-x-0 bottom-0 mx-[1.5px] p-3 justify-between align-center flex flex-row"
|
||||
>
|
||||
<div class="">
|
||||
<!--Lang-->
|
||||
<span>Lang: </span>
|
||||
<span class="text-lg">{{ t("localeflag") }}</span>
|
||||
<button class="w-4 h-4 hover:text-blue-200 transition-all duration-100" @click="toggleLangMenu">
|
||||
<LanguageIcon />
|
||||
</button>
|
||||
</div>
|
||||
<div class="gap-2 flex flex-row">
|
||||
<!--版權資訊-->
|
||||
<span class="text-sm">1.0.0</span>
|
||||
<span class="text-sm">|</span>
|
||||
<span class="text-sm">MIT License</span>
|
||||
<span class="text-sm">|</span>
|
||||
<span class="text-sm">{{ new Date().getFullYear() }} © yh</span>
|
||||
</div>
|
||||
<div class="">
|
||||
<button @click="openWindow('login')" class="w-8 h-8 text-gray-400 flex flex-row">
|
||||
<UserIcon class="w-8 h-8 text-gray-400 hover:text-blue-500 transition-all duration-100" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
|
@ -1,42 +1,8 @@
|
|||
<script setup lang="ts">
|
||||
// No layout
|
||||
import DraggableWindow from "~/components/DraggableWindow.vue";
|
||||
definePageMeta({
|
||||
layout: false,
|
||||
layout: "macui",
|
||||
});
|
||||
|
||||
// interfaces
|
||||
interface currentNavBarInterface {
|
||||
name: string;
|
||||
icon: string;
|
||||
action: any;
|
||||
flash: boolean;
|
||||
windowAssociated: string;
|
||||
}
|
||||
|
||||
// Import plugins
|
||||
import { gsap } from "gsap";
|
||||
import { TextPlugin } from "gsap/TextPlugin";
|
||||
import { createApp } from "vue";
|
||||
gsap.registerPlugin(TextPlugin);
|
||||
|
||||
// Import Windows
|
||||
import SignIn from "~/components/app/windows/login.vue";
|
||||
|
||||
// Import Shadcn/UI components
|
||||
import AlertComponent from "~/components/ui/alert/Alert.vue";
|
||||
import ButtonComponent from "~/components/ui/button/Button.vue";
|
||||
import DialogComponent from "~/components/ui/dialog/Dialog.vue";
|
||||
import ProgressComponent from "~/components/ui/progress/Progress.vue";
|
||||
import HoverCardComponent from "~/components/ui/hover-card/HoverCard.vue";
|
||||
|
||||
// Icons
|
||||
import { ComputerDesktopIcon, UserIcon, LanguageIcon, ChevronRightIcon } from "@heroicons/vue/24/outline";
|
||||
|
||||
// i18n
|
||||
const { t, locale, locales } = useI18n();
|
||||
const switchLocalePath = useSwitchLocalePath();
|
||||
const localePath = useLocalePath();
|
||||
|
||||
// Router
|
||||
const router = useRouter();
|
||||
const route = useRoute();
|
||||
|
@ -55,139 +21,7 @@ watch(() => route.query.openapp, (newVal) => {
|
|||
});
|
||||
}
|
||||
});
|
||||
|
||||
// values
|
||||
const popMessage = ref(null);
|
||||
const menuOpen = ref(false);
|
||||
const langMenuOpen = ref(false);
|
||||
const lang = ref(locale.value);
|
||||
const alertOpen = ref(false);
|
||||
const currentNavBar = ref<currentNavBarInterface[]>([]);
|
||||
|
||||
// Date
|
||||
const currentDate = ref(
|
||||
new Date().toLocaleDateString("zh-TW", {
|
||||
month: "2-digit",
|
||||
day: "2-digit",
|
||||
year: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
second: "2-digit",
|
||||
hour12: false,
|
||||
}),
|
||||
);
|
||||
onMounted(() => {
|
||||
setInterval(() => {
|
||||
currentDate.value = new Date().toLocaleDateString("zh-TW", {
|
||||
month: "2-digit",
|
||||
day: "2-digit",
|
||||
year: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
second: "2-digit",
|
||||
hour12: false,
|
||||
});
|
||||
}, 1000);
|
||||
});
|
||||
|
||||
// functions
|
||||
const openWindow = (windowName?: string) => {
|
||||
if (windowName === "leave") {
|
||||
router.push(localePath("/home"));
|
||||
}
|
||||
console.log(windowName);
|
||||
menuOpen.value = false;
|
||||
}
|
||||
|
||||
const unMinWindow = (windowName?: string) => {
|
||||
|
||||
}
|
||||
|
||||
// menus
|
||||
const menuItems = [
|
||||
{ name: "Hot News", windowName: "hotnews"} ,
|
||||
{ name: "News", windowName: "news"},
|
||||
{ name: "Sources", windowName: "sources"},
|
||||
{ name: 'About This Website', windowName: "about"},
|
||||
{ name: 'Settings', windowName: "settings"},
|
||||
{ name: 'Leave', windowName: "leave"},
|
||||
]
|
||||
const toggleMenu = () => {
|
||||
menuOpen.value = !menuOpen.value
|
||||
}
|
||||
// Lang Menu
|
||||
const toggleLangMenu = () => {
|
||||
langMenuOpen.value = !langMenuOpen.value
|
||||
}
|
||||
</script>
|
||||
<template>
|
||||
<div
|
||||
class="absolute inset-x-0 flex flex-row px-2 py-1 bg-[#7D7C7C]/70 text-white justify-between align-center text-center z-50"
|
||||
>
|
||||
<!--Menu container-->
|
||||
<div class="flex flex-row g-2 text-gray-400 text-white ">
|
||||
<button @click="toggleMenu" class="w-8 h-8 text-white hover:text-blue-500 transition-all duration-100 flex flex-row">
|
||||
<ComputerDesktopIcon/>
|
||||
</button>
|
||||
<span class="ml-1 mr-2 text-[20px]">|</span>
|
||||
<!--navbar icons for min and max application window-->
|
||||
<div v-for="item in currentNavBar" :key="item.name" class="flex flex-row items-center gap-x-2 hover:bg-gray-100 transition-all duration-100 px-4 py-2 cursor-pointer">
|
||||
<button @click="unMinWindow(item.windowAssociated)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||
<span>{{ item.name }}</span>
|
||||
<span v-if="item.flash" class="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
|
||||
<span v-if="item.icon" :class="item.icon">
|
||||
</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="text-center align-middle justify-center text-white">{{ currentDate }}</div>
|
||||
</div>
|
||||
<div class="w-full h-[2.5em]"></div>
|
||||
<!--Menu-->
|
||||
<Transition
|
||||
enter-active-class="animate__animated animate__fadeInDown animate_fast03"
|
||||
leave-active-class="animate__animated animate__fadeOutUp animate_fast03"
|
||||
>
|
||||
<div class="m-2 p-2 bg-gray-800 shadow-lg w-fit rounded-[10px]" v-if="menuOpen">
|
||||
<div v-for="item in menuItems" :key="item.name" class="">
|
||||
<button @click="openWindow(item.windowName)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||
<span>{{ item.name }}</span>
|
||||
<ChevronRightIcon class="w-4 h-4 justify-center align-center" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</Transition>
|
||||
<!--Main desktop contents-->
|
||||
<div
|
||||
class="flex flex-col justify-center align-center text-center absolute w-full h-screen inset-x-0 inset-y-0 z-[-1]"
|
||||
id="desktop"
|
||||
>
|
||||
|
||||
</div>
|
||||
<!--Footer-->
|
||||
<div
|
||||
class="absolute w-[calc(100% - 5px)] inset-x-0 bottom-0 mx-[1.5px] p-3 justify-between align-center flex flex-row"
|
||||
>
|
||||
<div class="">
|
||||
<!--Lang-->
|
||||
<span>Lang: </span>
|
||||
<span class="text-lg">{{ t("localeflag") }}</span>
|
||||
<button class="w-4 h-4 hover:text-blue-200 transition-all duration-100" @click="toggleLangMenu">
|
||||
<LanguageIcon />
|
||||
</button>
|
||||
</div>
|
||||
<div class="gap-2 flex flex-row">
|
||||
<!--版權資訊-->
|
||||
<span class="text-sm">1.0.0</span>
|
||||
<span class="text-sm">|</span>
|
||||
<span class="text-sm">MIT License</span>
|
||||
<span class="text-sm">|</span>
|
||||
<span class="text-sm">{{ new Date().getFullYear() }} © yh</span>
|
||||
</div>
|
||||
<div class="">
|
||||
<button @click="openWindow('login')" class="w-8 h-8 text-gray-400 flex flex-row">
|
||||
<UserIcon class="w-8 h-8 text-gray-400 hover:text-blue-500 transition-all duration-100" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<DraggableWindow title="Title">Hi This is a window</DraggableWindow>
|
||||
</template>
|
|
@ -1,7 +1,25 @@
|
|||
# Status
|
||||
|
||||
## cna.py
|
||||
Not working
|
||||
|
||||
## setn.py
|
||||
Working
|
||||
|
||||
## tvbs.py
|
||||
Working
|
||||
|
||||
## taisounds.py
|
||||
Working
|
||||
|
||||
## cna.py
|
||||
Broken
|
||||
|
||||
Error: `Error: 'utf-8' codec can't decode byte 0x83 in position 0: invalid start byte`
|
||||
|
||||
## chinatimes.py
|
||||
Broken
|
||||
|
||||
Error: `Error: 'utf-8' codec can't decode byte 0xa3 in position 0: invalid start byte`
|
||||
|
||||
## twreporter.py
|
||||
Broken
|
||||
|
||||
Error: `Error: 'utf-8' codec can't decode byte 0xc0 in position 2: invalid start byte`
|
56
scraping/findText/chinatimes.py
Normal file
56
scraping/findText/chinatimes.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
import re
|
||||
from urllib.request import urlopen, Request
|
||||
import chardet
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
import dotenv
|
||||
import os
|
||||
import gzip
|
||||
import io
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
headers = {
|
||||
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': '*',
|
||||
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Connection': 'keep-alive',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Cache-Control': 'max-age=0',
|
||||
}
|
||||
|
||||
url = "https://www.chinatimes.com/realtimenews/20250511002798-260407?chdtv"
|
||||
|
||||
try:
|
||||
req = Request(url, headers=headers)
|
||||
response = urlopen(req)
|
||||
if response.info().get('Content-Encoding') == 'gzip':
|
||||
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||
html = gzip_file.read().decode('utf-8')
|
||||
else:
|
||||
html = response.read().decode('utf-8')
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
title = soup.find('h1', class_='article-title')
|
||||
title_text = title.text.strip() if title else "No title found"
|
||||
|
||||
article = soup.find('div', class_="article-body")
|
||||
content = article.text.strip() if article else "No content found"
|
||||
|
||||
# Print results
|
||||
print(f"Title: {title_text}")
|
||||
print(f"Content: {content}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}")
|
||||
if 'soup' in locals():
|
||||
print("\nAvailable classes in HTML:")
|
||||
for tag in soup.find_all(class_=True):
|
||||
print(f"Tag: {tag.name}, Class: {tag['class']}")
|
61
scraping/findText/taisounds.py
Normal file
61
scraping/findText/taisounds.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
import re
|
||||
from urllib.request import urlopen, Request
|
||||
import chardet
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
import dotenv
|
||||
import os
|
||||
import gzip
|
||||
import io
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
headers = {
|
||||
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': '*',
|
||||
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Connection': 'keep-alive',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Cache-Control': 'max-age=0',
|
||||
}
|
||||
|
||||
url = "https://www.taisounds.com/news/content/84/189872"
|
||||
|
||||
|
||||
try:
|
||||
req = Request(url, headers=headers)
|
||||
response = urlopen(req)
|
||||
if response.info().get('Content-Encoding') == 'gzip':
|
||||
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||
html = gzip_file.read().decode('utf-8')
|
||||
else:
|
||||
html = response.read().decode('utf-8')
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
title = soup.find('h1')
|
||||
title_text = title.text.strip() if title else "No title found"
|
||||
|
||||
#author = soup.find('div', class_='publish')
|
||||
#author_text = author.text.strip().soup.find('a').text.strip() if author else "No author found"
|
||||
|
||||
article = soup.find('div', class_='news-box-text')
|
||||
content = article.text.strip() if article else "No content found"
|
||||
|
||||
# Print results
|
||||
print(f"Title: {title_text}")
|
||||
#print(f"Author: {author_text}")
|
||||
print(f"Content: {content}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}")
|
||||
if 'soup' in locals():
|
||||
print("\nAvailable classes in HTML:")
|
||||
for tag in soup.find_all(class_=True):
|
||||
print(f"Tag: {tag.name}, Class: {tag['class']}")
|
57
scraping/findText/tvbs.py
Normal file
57
scraping/findText/tvbs.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
# BROKEN
|
||||
import re
|
||||
from urllib.request import urlopen, Request
|
||||
import chardet
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
import dotenv
|
||||
import os
|
||||
import gzip
|
||||
import io
|
||||
|
||||
# Load environment variables from .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
headers = {
|
||||
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': '*',
|
||||
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Connection': 'keep-alive',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Cache-Control': 'max-age=0',
|
||||
}
|
||||
|
||||
|
||||
url = "https://news.tvbs.com.tw/politics/2866915"
|
||||
|
||||
try:
|
||||
req = Request(url, headers=headers)
|
||||
response = urlopen(req)
|
||||
if response.info().get('Content-Encoding') == 'gzip':
|
||||
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||
html = gzip_file.read().decode('utf-8')
|
||||
else:
|
||||
html = response.read().decode('utf-8')
|
||||
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Extract content
|
||||
title = soup.find('h1', class_='title')
|
||||
title_text = title.text.strip() if title else "No title found"
|
||||
|
||||
article = soup.find('div', class_="article_content")
|
||||
paragraph = article.text.strip() if article else ""
|
||||
|
||||
# Print results
|
||||
print(f"Title: {title_text}")
|
||||
print(f"Content: {paragraph}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}")
|
57
scraping/findText/twreporter.py
Normal file
57
scraping/findText/twreporter.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
# BROKEN
|
||||
import re
|
||||
from urllib.request import urlopen, Request
|
||||
import chardet
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
import dotenv
|
||||
import os
|
||||
import gzip
|
||||
import io
|
||||
|
||||
# Load environment variables from .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
headers = {
|
||||
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': '*',
|
||||
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Connection': 'keep-alive',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Cache-Control': 'max-age=0',
|
||||
}
|
||||
|
||||
|
||||
url = "https://www.twreporter.org/a/olena-yagupova-kidnapped-by-russian-soldiers"
|
||||
|
||||
try:
|
||||
req = Request(url, headers=headers)
|
||||
response = urlopen(req)
|
||||
if response.info().get('Content-Encoding') == 'gzip':
|
||||
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||
html = gzip_file.read().decode('utf-8')
|
||||
else:
|
||||
html = response.read().decode('utf-8')
|
||||
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Extract content
|
||||
title = soup.find('div', class_=r'headline__DefaultContainer.*?')
|
||||
title_text = title.text.strip() if title else "No title found"
|
||||
|
||||
article = soup.find('div', class_=r"article-page__ContentBlock.*?")
|
||||
paragraph = article.text.strip() if article else ""
|
||||
|
||||
# Print results
|
||||
print(f"Title: {title_text}")
|
||||
print(f"Content: {paragraph}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}")
|
Loading…
Add table
Add a link
Reference in a new issue