Skip to content

Commit d6abe4d

Browse files
committed
reduce complexity
1 parent 856d0b3 commit d6abe4d

File tree

1 file changed

+41
-61
lines changed

1 file changed

+41
-61
lines changed

Apple-Music-Scraper/main.py

Lines changed: 41 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -635,92 +635,75 @@ def video_scrape(
635635
headers = {"User-Agent": "Mozilla/5.0"}
636636

637637
try:
638-
rspn = requests.get(url, headers=headers, timeout=10)
639-
rspn.raise_for_status()
638+
resp = requests.get(url, headers=headers, timeout=10)
639+
resp.raise_for_status()
640640
except Exception:
641641
return result
642642

643-
soup = BeautifulSoup(rspn.text, "html.parser")
644-
tag = soup.find("script", {"id": "serialized-server-data"})
643+
soup = BeautifulSoup(resp.text, "html.parser")
644+
645+
# Load JSON blob
646+
tag = soup.find("script", id="serialized-server-data")
645647
if not tag:
646648
return result
647649

648650
try:
649651
data = json.loads(tag.text)
650652
sections = data[0]["data"]["sections"]
651-
except (KeyError, IndexError, json.JSONDecodeError):
653+
except Exception:
652654
return result
653655

654-
music_video_header = None
655-
more = None
656-
similar = None
656+
# Extract relevant sections
657+
music_video_header = next((s for s in sections if "music-video-header" in s.get("id", "")), None)
658+
more_sec = next((s for s in sections if "more-by-artist" in s.get("id", "")), None)
659+
similar_sec = next((s for s in sections if "more-in-genre" in s.get("id", "")), None)
657660

658-
for sec in sections:
659-
sec_id = sec.get("id", "")
660-
if "music-video-header" in sec_id:
661-
music_video_header = sec
662-
elif "more-by-artist" in sec_id:
663-
more = sec
664-
elif "more-in-genre" in sec_id:
665-
similar = sec
666-
667-
# TITLE
661+
# Parse main item
668662
item = (music_video_header or {}).get("items", [{}])[0]
669663
result["title"] = item.get("title", "")
670664

671-
# IMAGE
672-
try:
673-
artwork = item.get("artwork", {}).get("dictionary", {})
665+
# Artwork
666+
artwork = item.get("artwork", {}).get("dictionary", {})
667+
if artwork:
674668
result["image"] = get_cover(
675669
artwork.get("url", ""),
676670
artwork.get("width", 0),
677-
artwork.get("height", 0),
671+
artwork.get("height", 0)
678672
)
679-
except Exception:
680-
pass
681673

682-
# ARTIST
683-
try:
684-
sl = item.get("subtitleLinks", [])[0]
685-
result["artist"]["title"] = sl.get("title", "")
686-
result["artist"]["url"] = (
687-
sl["segue"]["actionMetrics"]
688-
["data"][0]["fields"]["actionUrl"]
689-
)
690-
except Exception:
691-
pass
692-
693-
# VIDEO URL
694-
try:
695-
json_tag = soup.find(
696-
"script",
697-
{
698-
"id": "schema:music-video",
699-
"type": "application/ld+json"
700-
}
701-
)
702-
schema_data = json.loads(json_tag.string)
703-
result["video-url"] = schema_data["video"]["contentUrl"]
704-
except (AttributeError, KeyError, TypeError, json.JSONDecodeError):
705-
pass
674+
# Artist info
675+
sl = item.get("subtitleLinks", [{}])[0]
676+
result["artist"]["title"] = sl.get("title", "")
677+
result["artist"]["url"] = (
678+
sl.get("segue", {})
679+
.get("actionMetrics", {})
680+
.get("data", [{}])[0]
681+
.get("fields", {})
682+
.get("actionUrl", "")
683+
)
706684

707-
# MORE BY ARTIST
708-
try:
709-
for m in more.get("items", []):
685+
# Video URL (from JSON-LD)
686+
schema_tag = soup.find("script", id="schema:music-video")
687+
if schema_tag:
688+
try:
689+
schema_data = json.loads(schema_tag.string)
690+
result["video-url"] = schema_data.get("video", {}).get("contentUrl", "")
691+
except Exception:
692+
pass
693+
694+
# More by artist
695+
if more_sec:
696+
for m in more_sec.get("items", []):
710697
url = safe_action_url(m)
711698
if url:
712699
result["more"].append(url)
713-
except Exception:
714-
pass
715700

716-
# SIMILAR
717-
try:
718-
for s in similar.get("items", []):
701+
# Similar videos
702+
if similar_sec:
703+
for s in similar_sec.get("items", []):
719704
url = safe_action_url(s)
720705
if url:
721706
result["similar"].append(url)
722-
except Exception:
723-
pass
724707

725708
return result
726709

@@ -955,6 +938,3 @@ def test_all_functions():
955938
print("artist_scrape ERROR:", e)
956939

957940
print("\n=== ALL TESTS COMPLETED ===")
958-
959-
960-
# test_all_functions()

0 commit comments

Comments
 (0)