In an effort to learn Python coming from PHP and Java I've been working on a simple script to go through the Steemit blockchain and grab some daily post, comment, vote and flag stats. I was curious to track Steemit's growth.
The script below will go out and fetch blocks for a given time span specified in days. Parse through the block values incrementing some simple counters and then dumping that aggregated information into MySQL after it's done. It could easily be modified to record a lot more information if one were so inclined.
The snippet of code I hope people might actually find useful to reuse is the two functions dealing with retrieving blocks by date. I found figuring out which blocks were which for a given day to not be the easiest task. It appears there are a maximum 28,800 blocks per day which is 1 block every 3 seconds however the network can run slower than that but seemingly never faster so the estimate of 28,800 blocks in 24 hours a ball park figure. The function will cycle through and do a little math to try and find a block for the matching date down to 3 seconds.
If you run the script it will just print out a counter of the % done when it reaches 100 it's over. Feel free to reuse my code and leave any tips or comments for me in the comments
from steemapi.steemnoderpc import SteemNodeRPC
rpc = SteemNodeRPC('ws://node.steem.ws')
from collections import Counter
import csv
import json
import datetime
import sys
import collections
import mysql.connector as mariadb
'''
read in the days we are processing as command line arguements
'''
try:
startDatetimeArg = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d")
endDatetimeArg = datetime.datetime.strptime(sys.argv[2], "%Y-%m-%d")
if startDatetimeArg > endDatetimeArg:
print('Invalid timespan make sure endday is after startday')
sys.exit()
except:
print('Usage python3 stats.py ')
sys.exit()
def findBlocksByTime(startDatetime, endDatetime):
"Get the head block, guess the blocknum and validate"
calibBlockNum = props['head_block_number']
calibBlockDatetime = datetime.datetime.strptime(rpc.get_block(calibBlockNum)['timestamp'], "%Y-%m-%dT%H:%M:%S")
daysFrom = startDatetime - calibBlockDatetime
startBlockNum = calibBlockNum + (daysFrom.days * 28800)
startBlockNum = validateBlocksByTime(startBlockNum, startDatetime)
daysFrom = endDatetime - startDatetime
endBlockNum = startBlockNum + (daysFrom.days * 28800)
endBlockNum = validateBlocksByTime(endBlockNum, endDatetime)
return [startBlockNum, endBlockNum]
def validateBlocksByTime(guessBlockNum, guessDatetime):
"cycle through until we validate a block num to a time with 3 sec"
block = rpc.get_block_header(guessBlockNum)
blockDatetime = datetime.datetime.strptime(block["timestamp"], "%Y-%m-%dT%H:%M:%S")
timeDiff = guessDatetime - blockDatetime
if abs(timeDiff.total_seconds()) > 3:
newBlockNum = int( guessBlockNum + (timeDiff.total_seconds() / 3 ) )
return validateBlocksByTime(newBlockNum, guessDatetime)
else:
return guessBlockNum
props = rpc.get_dynamic_global_properties()
blockNums = findBlocksByTime(startDatetimeArg, endDatetimeArg)
statsDays = collections.defaultdict(int)
for i in range(blockNums[0], blockNums[1]):
block = rpc.get_block(i)
blockDatetime = datetime.datetime.strptime(block["timestamp"], "%Y-%m-%dT%H:%M:%S")
if blockDatetime < startDatetimeArg:
print("Too early")
elif blockDatetime < endDatetimeArg:
headersOnly = 0
for t in block["transactions"]:
for o in t["operations"]:
if "comment" in o:
comment = o[1]
if comment["title"] == "":
statsDays[(blockDatetime.strftime("%Y-%m-%d"),o[1]["author"],"comment")] += 1
else:
statsDays[(blockDatetime.strftime("%Y-%m-%d"),o[1]["author"],"post")] += 1
elif "vote" in o:
if o[1]["weight"] > 0:
statsDays[(blockDatetime.strftime("%Y-%m-%d"),o[1]["voter"],"vote")] += 1
else:
statsDays[(blockDatetime.strftime("%Y-%m-%d"),o[1]["voter"],"flag")] += 1
percentDone = 100 - ((blockNums[1]- i) / (blockNums[1] - blockNums[0]) * 100)
print(round(percentDone,2))
mariadb_connection = mariadb.connect(user='user', password='34fStOpL00K4pAszF7Z', database='database')
cursor = mariadb_connection.cursor()
for k, v in statsDays.items():
dt = datetime.datetime.strptime(k[0], "%Y-%m-%d")
if "vote" in k:
cursor.execute("INSERT INTO votes (dt, voter, is_flag, count) VALUES (%s,%s,%s,%s) ON DUPLICATE KEY UPDATE count=%s", (dt,k[1],0,v,v))
elif "flag" in k:
cursor.execute("INSERT INTO votes (dt, voter, is_flag, count) VALUES (%s,%s,%s,%s) ON DUPLICATE KEY UPDATE count=%s", (dt,k[1],1,v,v))
elif "comment" in k:
cursor.execute("INSERT INTO comments (dt, author, is_post, count) VALUES (%s,%s,%s,%s) ON DUPLICATE KEY UPDATE count=%s", (dt,k[1],0,v,v))
elif "post" in k:
cursor.execute("INSERT INTO comments (dt, author, is_post, count) VALUES (%s,%s,%s,%s) ON DUPLICATE KEY UPDATE count=%s", (dt,k[1],1,v,v))
cursor.close()
mariadb_connection.commit()
mariadb_connection.close()
Here is the MySQL schema for the tables
--
-- Table structure for table `votes`
--
CREATE TABLE `votes` (
`dt` datetime NOT NULL,
`voter` varchar(255) NOT NULL,
`is_flag` tinyint(1) NOT NULL,
`count` int(11) DEFAULT NULL,
PRIMARY KEY (`dt`,`voter`,`is_flag`)
);
--
-- Table structure for table `votes`
--
--
-- Table structure for table `comments`
--
CREATE TABLE `comments` (
`dt` datetime NOT NULL,
`author` varchar(255) NOT NULL,
`is_post` tinyint(1) NOT NULL,
`count` int(11) DEFAULT NULL,
PRIMARY KEY (`dt`,`author`,`is_post`)
) ;
You can check out my previous Python learning with a simple Steemit bot here: @contentjunkie/learning-python-by-making-a-steemit-bot