Spaces:

green
/

TopicDig

Runtime error

TopicDig / source.py

m. polinsky

Create source.py

c6e9812 unverified about 4 years ago

2.26 kB

	# source.py provides an abstract dataclass for a data source
	from abc import ABC, abstractmethod
	from dataclasses import dataclass
	from collections import namedtuple
	from typing import List, Optional

	Summary = namedtuple('Summary',['source','cluster_list','link_ext','hed','dek','date','authors','original_length','summary_text','summary_length','chunk_time', 'query_time', 'mean_query_time', 'summary_time'])
	Summary.__doc__ = f"""
	Summary: a namedtuple for storing Summaries and relevant metadata.

	• Source: A Source object for the source of the summarized document.
	• cluster_list: A list of the NER entities detected in this article's hed (headline).
	• link_ext: The link extension of the article (on the base url, source's source_url)
	• hed, dek: headline and subheader. These are standard industry terms.
	Dek is None if not applicable.
	• date: Date of publication/update listed in article.
	• authors: list of authors, currently a string containing the byline.
	• original_length: length of the original article
	• cluster_num: Number of clusters the source article appears in
	• summary_text: List of summarized chunks.
	• summary_length: Length of summary text
	• stats for stats
	"""

	@dataclass
	class Source(ABC):
	source_name: Optional[str] = ""
	source_url: Optional[str] = ""
	# Checkpoint str encourages use of source-appropriate models.
	source_summarization_checkpoint: Optional[str] = ""
	source_ner_checkpoint: Optional[str] = ""

	"""
	User must implement a source-dependent method
	to retrieve data used to create clusters.

	This gets called when clustering is performed.
	"""
	@abstractmethod
	def retrieve_cluster_data(self) -> List[namedtuple]:
	pass

	"""
	User must implement a source-dependent method
	to retrieve texts for summarization.

	This gets called once topics for digestion have been selected.
	"""
	@abstractmethod
	def retrieve_article(self) -> List[namedtuple]:
	pass