Home>

Connecting to PostgreSQL using SQLAlchemy, defining the model and trying to add the text in the folder (Html ​​format) to the database.
When I run the following code, I get the following error message: I'm sorry. Can anyone tell me the cause of the error?
Best regard.


AttributeError
Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/sqlalchemy/orm/session.py in add (self, instance, _warn)
1942 try:
->1943 state = attributes.instance_state (instance)
1944 except exc.NO_STATE:

AttributeError: 'tuple' object has no attribute '_sa_instance_state'

During handling of the above exception, another exception occurred:

UnmappedInstanceError
Traceback (most recent call last)
in
9 print ('scraped:', title)
10 url = 'https://en.wikipedia.org/wiki/{0}'.format(urllib.parse.quote(title))
--->11 session.add ((text, json.dumps ({'url': url, 'title': title})))
12 session.commit ()
13 session.close ()

/opt/conda/lib/python3.7/site-packages/sqlalchemy/orm/session.py in add (self, instance, _warn)
1943 state = attributes.instance_state (instance)
1944 except exc.NO_STATE:
->1945 raise exc.UnmappedInstanceError (instance)
1946
1947 self._save_or_update_state (state)

UnmappedInstanceError: Class 'builtins.tuple' is not mapped

import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
url = URL (drivername = 'postgresql', username = 'postgres', password = '<password>', host = 'localhost', database = 'test')
url
engine = create_engine (url)
engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String
Base = declarative_base ()
class User (Base):
    __tablename__ = 'users'
    id = Column (Integer, primary_key = True)
    content = Column (String)
    meta_info = Column (String)
from sqlalchemy.orm import sessionmaker
session = sessionmaker (bind = engine) ()
import re
import unicodedata
from bs4 import BeautifulSoup
translation_table = str.maketrans (dict (zip ('()!', '()!')))
def cleanse (text):
    text = unicodedata.normalize ('NFKC', text) .translate (translation_table)
    text = re.sub (r '\ s +', '', text)
    return text
def scrape (html):
    soup = BeautifulSoup (html, 'html.parser')
    # Insert __EOS__
    for block in soup.find_all (['br', 'p', 'h1', 'h2', 'h3', 'h4']):
        if len (block.text.strip ())>0 and block.text.strip () [-1] not in ['. ','! ']:
            block.append ('<__ EOS __>')
    # Extract text
    text = '\ n'.join ((cleanse (block.text.strip ())
                      for block in soup.find_all (['p', 'h1', 'h2', 'h3', 'h4'])
                      if len (block.text.strip ())>0])
    # Extract title
    title = cleanse (soup.title.text.replace ('-Wikipedia', ''))
    return text, title
import glob
import urllib.request
import json
users = []
for filename in glob.glob ('./ data/wikipedia/*. html'):
    with open (filename) as fin:
        html = fin.read ()
        text, title = scrape (html)
        print ('scraped:', title)
        url = 'https://en.wikipedia.org/wiki/{0}'.format(urllib.parse.quote(title))
        session.add ((text, json.dumps ({'url': url, 'title': title})))
        session.commit ()
    session.close ()
Code
Trends