<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:blogger='http://schemas.google.com/blogger/2008' xmlns:georss='http://www.georss.org/georss' xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-7583720</id><updated>2026-04-10T02:27:18.299-07:00</updated><category term="java"/><category term="python"/><category term="scala"/><category term="nlp"/><category term="data-mining"/><category term="search"/><category term="lucene"/><category term="spring"/><category term="information-retrieval"/><category term="deep-learning"/><category term="machine-learning"/><category term="general"/><category term="solr"/><category term="algorithms"/><category term="scripting"/><category term="graph"/><category term="hadoop"/><category term="web-development"/><category term="classification"/><category term="databases"/><category term="xml"/><category term="clustering"/><category term="distributed-computing"/><category term="nutch"/><category term="uima"/><category term="json"/><category term="keras"/><category term="actor"/><category term="cms"/><category term="crawler"/><category term="indexing"/><category term="nltk"/><category term="parser"/><category term="image-processing"/><category term="spark"/><category term="web-service"/><category term="alfresco"/><category term="neo4j"/><category term="recommender"/><category term="linux"/><category term="mahout"/><category term="pytorch"/><category term="similarity"/><category term="statistics"/><category term="tensorflow"/><category term="concept-mapping"/><category term="concurrent"/><category term="data-structure"/><category term="map-reduce"/><category term="nosql"/><category term="patterns"/><category term="pos-tagging"/><category term="workflow"/><category term="annotations"/><category term="cassandra"/><category term="drupal"/><category term="eclipse"/><category term="large-language-models"/><category term="mvc"/><category term="named-entity-recognition"/><category term="ontology"/><category term="php"/><category term="probability"/><category term="rome"/><category term="roo"/><category term="rss"/><category term="rules"/><category term="semantic-web"/><category term="unix"/><category term="caching"/><category term="data-management"/><category term="facets"/><category term="javascript"/><category term="jmx"/><category term="math"/><category term="matrix"/><category term="maven"/><category term="multithreading"/><category term="remoting"/><category term="transformers"/><category term="word-embeddings"/><category term="xmlrpc"/><category term="ajax"/><category term="biomedical-informatics"/><category term="cloud"/><category term="generative-ai"/><category term="hidden-markov-model"/><category term="jackrabbit"/><category term="jdbc"/><category term="jfreechart"/><category term="linear-algebra"/><category term="lingpipe"/><category term="parallel"/><category term="performance"/><category term="question-answering"/><category term="ror"/><category term="taxonomy"/><category term="weka"/><category term="awk"/><category term="content-management"/><category term="cosine-similarity"/><category term="data-science"/><category term="debugging"/><category term="elasticsearch"/><category term="etl"/><category term="events"/><category term="functors"/><category term="ir"/><category term="jcr"/><category term="jdom"/><category term="jetty"/><category term="jms"/><category term="junit"/><category term="jython"/><category term="kilim"/><category term="knowledge-graph"/><category term="lisp"/><category term="lucli"/><category term="mac-os-x"/><category term="maven2"/><category term="mysql"/><category term="naive-bayes"/><category term="nearest-neighbor"/><category term="opennlp"/><category term="oracle"/><category term="owl"/><category term="prolog"/><category term="r-language"/><category term="rome-custom-modules"/><category term="ruby-on-rails"/><category term="scalding"/><category term="security"/><category term="spacy"/><category term="windows"/><category term="PyLucene"/><category term="acegi"/><category term="amazon-web-services"/><category term="ant"/><category term="berkeley-db"/><category term="bitsets"/><category term="cascading"/><category term="cherrypy"/><category term="conditional-probability"/><category term="conference"/><category term="development"/><category term="dimensionality-reduction"/><category term="django"/><category term="fedora"/><category term="feeds"/><category term="gdata"/><category term="gensim"/><category term="gnuplot"/><category term="hbase"/><category term="hibernate"/><category term="html"/><category term="httpclient"/><category term="imap"/><category term="jetlang"/><category term="jgrapht"/><category term="jpa"/><category term="k-means"/><category term="luke"/><category term="maven-plugin"/><category term="messaging"/><category term="mongodb"/><category term="object-oriented-design"/><category term="open-source"/><category term="osworkflow"/><category term="pca"/><category term="profiling"/><category term="prototype"/><category term="pylons"/><category term="pymc3"/><category term="question-generation"/><category term="rest"/><category term="retrieval-augmented-generation"/><category term="rsync"/><category term="scikit-learn"/><category term="singular-value-decomposition"/><category term="software-engineering"/><category term="spell-checking"/><category term="springmodules-jcr"/><category term="time-series"/><category term="unit-testing"/><category term="urlrewrite-filter"/><category term="vector-search"/><category term="velocity"/><category term="web"/><category term="xpath"/><category term=".net"/><category term="actorfoundry"/><category term="actorsguild"/><category term="apache-cxf"/><category term="aspectj"/><category term="atom"/><category term="bitfauna"/><category term="blogger"/><category term="burlap"/><category term="c#"/><category term="clisp"/><category term="clojure"/><category term="collections15"/><category term="collocation"/><category term="commons-beanutils"/><category term="commons-collections"/><category term="commons-digester"/><category term="commons-httpclient"/><category term="commons-math"/><category term="corenlp"/><category term="crowdsourcing"/><category term="cusp"/><category term="cx_oracle"/><category term="dask"/><category term="data-analysis"/><category term="dbunit"/><category term="decorator-pattern"/><category term="dijkstra"/><category term="documentation"/><category term="dojo"/><category term="dwr"/><category term="dynabean"/><category term="ec2"/><category term="edb"/><category term="emacs"/><category term="embedded-databases"/><category term="enterprisedb"/><category term="evaluation"/><category term="eventbus"/><category term="garbage-collection"/><category term="generics"/><category term="genetic-algorithms"/><category term="google-web-toolkit"/><category term="graph-learning"/><category term="gwt"/><category term="hessian"/><category term="huggingface"/><category term="inverse-document-frequency"/><category term="inversion-of-control"/><category term="iot"/><category term="j2ee"/><category term="jaccard-similarity"/><category term="jahmm"/><category term="jama"/><category term="jatha"/><category term="javaconfig"/><category term="javamail"/><category term="javaspaces"/><category term="jax"/><category term="jazzy"/><category term="jdee"/><category term="jlisp"/><category term="journaling"/><category term="jsp"/><category term="jvm"/><category term="jwi"/><category term="kettle"/><category term="language-processing"/><category term="larvalabs"/><category term="latent-semantic-indexing"/><category term="lighttpd"/><category term="log-analysis"/><category term="ltr"/><category term="matplotlib"/><category term="mockobjects"/><category term="mojo"/><category term="mono"/><category term="mule"/><category term="named-entity-linking"/><category term="object-persistence"/><category term="pandas"/><category term="parser-toolkit"/><category term="pelops"/><category term="persistence"/><category term="personalization"/><category term="petri-net"/><category term="postgresql"/><category term="prevayler"/><category term="principal-component-analysis"/><category term="probabilistic-programming"/><category term="prompt-engineering"/><category term="pygments"/><category term="qdox"/><category term="qt"/><category term="rcs"/><category term="redhat"/><category term="relevance"/><category term="robert-sedgewick"/><category term="sentence-transformers"/><category term="sequence-learning"/><category term="shortest-path"/><category term="simulated-annealing"/><category term="smtp"/><category term="snorkel"/><category term="soap"/><category term="spark-nlp"/><category term="spatial"/><category term="springmodules-lucene"/><category term="squid"/><category term="stax"/><category term="summarization"/><category term="tag-cloud"/><category term="tagging"/><category term="tapestry"/><category term="tf-idf"/><category term="tiles"/><category term="transactions"/><category term="trees"/><category term="turbogears"/><category term="ubuntu"/><category term="unison"/><category term="url-rewriting"/><category term="version-control"/><category term="vision-models"/><category term="weak-supervision"/><category term="webapp-security"/><category term="webwork"/><category term="xfire"/><title type='text'>Salmon Run</title><subtitle type='html'>Swimming upstream on the technology tide, one technology at a time. A collection of articles, tips, and random musings on application development and system design.</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default?redirect=false'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default?start-index=26&amp;max-results=25&amp;redirect=false'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>493</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-7583720.post-5766091247983370548</id><published>2026-02-01T19:23:00.000-08:00</published><updated>2026-02-01T19:23:43.627-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="data-science"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><category scheme="http://www.blogger.com/atom/ns#" term="software-engineering"/><title type='text'>Book Review: Software Engineering for Data Scientists</title><summary type="text">As a Software Engineer (backend Web Development then Search) turned Data Scientist, I was particularly interested in what the book Software Engineering for Data Scientists by Andrew Treadway had to say about the reverse transition. Transitioning between sub-disciplines is a given in our industry -- I started life as a sales/support engineer, then moved to application programming, then back and </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/5766091247983370548/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/5766091247983370548' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/5766091247983370548'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/5766091247983370548'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2026/02/book-review-software-engineering-for.html' title='Book Review: Software Engineering for Data Scientists'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-2407699836506847262</id><published>2026-01-10T15:00:00.000-08:00</published><updated>2026-01-10T15:00:19.380-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="transformers"/><title type='text'>Book Review: Transformers In Action</title><summary type="text">The Attention Is All You Need paper proposed the Transformer Architecrture as an improvement to the dominant encoder-decoder models of the time (both recurrent and convolutional). These models used an attention mechanism to connect the encoder and decoder parts, but the Transformer Architecture flipped the script, putting the Attention Mechanism at the center. An early implementation of the </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/2407699836506847262/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/2407699836506847262' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2407699836506847262'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2407699836506847262'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2026/01/book-review-transformers-in-action.html' title='Book Review: Transformers In Action'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-1413917009070635405</id><published>2025-12-26T15:46:00.000-08:00</published><updated>2026-01-09T12:04:34.993-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="data-science"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><title type='text'>Trip Report: PyData Global 2025</title><summary type="text">I attended PyData Global 2025 earlier this month. I had hoped to write this up earlier, but I&#39;ve been busy, so only now getting the time Christmas morning. Merry Christmas to all my readers and best wishes for a Happy New 2026, hopefully it will be even better and more exciting (on the technology front) than this one! Taking stock of this year earlier today, I think I have some serious catching </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/1413917009070635405/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/1413917009070635405' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/1413917009070635405'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/1413917009070635405'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2025/12/trip-report-pydata-global-2025.html' title='Trip Report: PyData Global 2025'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-3119510747610131509</id><published>2025-10-12T12:04:00.000-07:00</published><updated>2025-10-12T12:04:37.823-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="python"/><category scheme="http://www.blogger.com/atom/ns#" term="time-series"/><title type='text'>Book Review: Time Series Forecasting using Foundation Models</title><summary type="text">As someone who primarily works in NLP and Search in the Health Domain, I don&#39;t have much use for Time Series. However, while exploring the Financial domain based on personal interest, I have been curious about Time Series for some time. Recently I attended the OpenHPI course Time Series Analysis taught by Mario Tormo Romero (even did the quizzes and the certificate of completion!). I was familiar</summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/3119510747610131509/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/3119510747610131509' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3119510747610131509'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3119510747610131509'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2025/10/book-review-time-series-forecasting.html' title='Book Review: Time Series Forecasting using Foundation Models'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-3625547917329916954</id><published>2025-09-20T07:27:00.000-07:00</published><updated>2025-09-20T07:27:55.617-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="statistics"/><title type='text'>Book Review: Statistics every Programmer Needs</title><summary type="text">I recently read Statistics every Programmer Needs by Gary Sutton. I am probably a good target audience for the book since I used to be a software developer that transitioned into data science some 10 years ago, then into machine learning with neural networks and transformers, and more recently, to Generative AI with Large Language Models. During this time, I have read numerous books on statistics</summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/3625547917329916954/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/3625547917329916954' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3625547917329916954'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3625547917329916954'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2025/09/book-review-statistics-every-programmer.html' title='Book Review: Statistics every Programmer Needs'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-555974058258313411</id><published>2025-06-28T13:08:00.000-07:00</published><updated>2025-06-28T13:08:35.091-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="deep-learning"/><category scheme="http://www.blogger.com/atom/ns#" term="iot"/><category scheme="http://www.blogger.com/atom/ns#" term="machine-learning"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><category scheme="http://www.blogger.com/atom/ns#" term="time-series"/><title type='text'>Book Review: Hands-On Artificial Intelligence for IoT</title><summary type="text">For those in similar professional circles as I am in, i.e. looking forward into the Generative AI space, yet with one foot pragmatically and firmly stuck in Machine Learning (ML) and Deep Learning (DL) techniques of the (recent, ok, not very distant) past, you will find Dr Amita Kapoor&#39;s recent book Hands-On Artificial Intelligence for IoT: Expert Machine Learning and Deep Learning Techniques for</summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/555974058258313411/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/555974058258313411' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/555974058258313411'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/555974058258313411'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2025/06/book-review-hands-on-artificial.html' title='Book Review: Hands-On Artificial Intelligence for IoT'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-6720469508089089268</id><published>2025-06-15T16:58:00.000-07:00</published><updated>2025-06-15T16:58:34.975-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="knowledge-graph"/><category scheme="http://www.blogger.com/atom/ns#" term="large-language-models"/><category scheme="http://www.blogger.com/atom/ns#" term="retrieval-augmented-generation"/><category scheme="http://www.blogger.com/atom/ns#" term="search"/><title type='text'>Book Review: Essential Graph RAG</title><summary type="text">Coming from a background of Knowledge Graph (KG) backed Medical Search, I don&#39;t need to be convinced about the importance of manually curated structured knowledge on the quality of search results. Traditional search is being rapidly replaced with Generative AI using a technique called Retrieval Augmented Generation (RAG), where the pipeline produces an answer summarizing the search results </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/6720469508089089268/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/6720469508089089268' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/6720469508089089268'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/6720469508089089268'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2025/06/book-review-essential-graph-rag.html' title='Book Review: Essential Graph RAG'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-4027855785487586520</id><published>2024-12-31T10:18:00.000-08:00</published><updated>2024-12-31T10:18:59.029-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="machine-learning"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><category scheme="http://www.blogger.com/atom/ns#" term="software-engineering"/><title type='text'>Packaging ML Pipelines from Experiment to Deployment</title><summary type="text">As an ML Engineer, we are generally tasked with solving some business problem with technology. Typically it involves leveraging data assets that your organization already owns or can acquire. Generally, unless it is a very simple problem, there would be more than one ML model involved, maybe different types of models depending on the sub-task, maybe other supporting tools such as a Search Index </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/4027855785487586520/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/4027855785487586520' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/4027855785487586520'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/4027855785487586520'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/12/packaging-ml-pipelines-from-experiment.html' title='Packaging ML Pipelines from Experiment to Deployment'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjj-fK2ZMJJOczVNxwEuFVb3Ol98xVizlwWIWfyWTOjrMq0zsFS9nsN1d6HxLXh4CRRZL8Ad9XkqDyGXEL1Eo134vRcDVZyYSKqIBG9Qllbv9aWDYYbvDnKRSxe26ByycPHiaiioTga6JFmYIpj3NAJ0LejPPqamqNdG_jpNUe5WQKMtOY5XemoBw/s72-c/pkg-ml-expts.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-5810773948471156097</id><published>2024-12-08T22:17:00.006-08:00</published><updated>2024-12-09T06:24:41.633-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="general"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><title type='text'>Trip Report - PyData Global 2024</title><summary type="text">I attended PyData Global 2024 last week. Its a virtual conference, so I was able to attend it from the comfort of my home, although presentations seem to be scheduled to be maximally convenient, time-wise, for folks in the US East Coast and Western Europe, so some of them were a bit early for me. There were four main tracks -- the General Track, the Data / Data Science Track, the AI / ML track </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/5810773948471156097/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/5810773948471156097' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/5810773948471156097'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/5810773948471156097'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/12/trip-report-pydata-global-2024.html' title='Trip Report - PyData Global 2024'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-3351294832206390671</id><published>2024-10-05T18:51:00.000-07:00</published><updated>2024-10-05T18:51:47.532-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="generative-ai"/><category scheme="http://www.blogger.com/atom/ns#" term="graph"/><category scheme="http://www.blogger.com/atom/ns#" term="question-answering"/><category scheme="http://www.blogger.com/atom/ns#" term="retrieval-augmented-generation"/><category scheme="http://www.blogger.com/atom/ns#" term="search"/><title type='text'>Using Knowledge Graphs to enhance Retrieval Augmented Generation</title><summary type="text">Retrieval Augmented Generation (RAG) has become a popular approach to harness LLMs for question answering using your own corpus of data. Typically, the context to augment the query that is passed into the Large Language Model (LLM) to generate an answer comes from a database or search index containing your domain data. When it is a search index, the trend is to use Vector search (HNSW ANN based) </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/3351294832206390671/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/3351294832206390671' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3351294832206390671'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3351294832206390671'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/10/using-knowledge-graphs-to-enhance.html' title='Using Knowledge Graphs to enhance Retrieval Augmented Generation'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgOBCylu4xu6Wu5-WKzWO0Xp58__F7hMkbvG6Y9m0MN4D3lBl5BcJvxiUJkYcNN_71hko9eV0WdbWczL2FHaHPUdI9vnutw2K5auPHs_inUp-o1aODWQ9ujlwgiMyL87c0Hotwly2hyphenhyphenbddiLppVn_vEO02stiQOeWts8HsLRhtoPYAseU96M3qmyA/s72-c/gar-fig.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-8297257525824569369</id><published>2024-07-29T17:07:00.000-07:00</published><updated>2024-07-29T17:07:28.914-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="generative-ai"/><category scheme="http://www.blogger.com/atom/ns#" term="information-retrieval"/><category scheme="http://www.blogger.com/atom/ns#" term="large-language-models"/><title type='text'>Experiments with Prompt Compression</title><summary type="text">I recently came across Prompt Compression (in the context of Prompt Engineering on Large Language Models) on this short course on Prompt Compression and Query Optimization from DeepLearning.AI. Essentially it involves compressing the prompt text using a trained model to drop non-essential tokens. The resulting prompt is shorter (and in cases of the original context being longer than the LLM&#39;s </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/8297257525824569369/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/8297257525824569369' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/8297257525824569369'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/8297257525824569369'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/07/experiments-with-prompt-compression.html' title='Experiments with Prompt Compression'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEj6kHp2gVhCrBRlsjWYVGl48Sb_SQrlhOtd3HJIH6VhKOeTdzvBs9rxoV2VKZebQ8zDn3fnCEPgSxS2FJyZ5hBsAA1nZ9EYzeRJ8jPaWUfYUi5XNL3u4Jls0XQZQlwhqKf1qSvtmH4Jwxc-OiaOsFfm3q7IvpG3nsdGvxr9D1rqm7XFEfHmchs-bA/s72-c/prompt_compression_scatter.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-1724681603841217995</id><published>2024-06-30T23:04:00.000-07:00</published><updated>2024-06-30T23:04:13.558-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="large-language-models"/><category scheme="http://www.blogger.com/atom/ns#" term="vision-models"/><title type='text'>Table Extraction from PDFs using Multimodal (Vision) LLMs</title><summary type="text">Couple of weeks ago a colleague and I participated in an internal hackathon where the task was to come up with an interesting use case using the recent multi-modal Large Language Models (LLMs). Multi-modal LLMs take not only text inputs via their prompt like earlier LLMs, but can also accept non-text modalities such as images and audio. Some examples of multi-modal LLMs are GPT-4o from OpenAI, </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/1724681603841217995/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/1724681603841217995' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/1724681603841217995'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/1724681603841217995'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/06/table-extraction-from-pdfs-using.html' title='Table Extraction from PDFs using Multimodal (Vision) LLMs'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjBvkORdmKBzNtFTb3xSCKoPEneCrTf1N47D1eQla-vNQBqy4zQSWX1sZGZ12H_TBXwBteyuRnh0LlLZ57JN2GNghGRZ6sAItdm8NGiiMvylwrlPMYecHH7AHnZfzWi_CbHZpMavDIPq_Dt0dhKgTvQVsUT13Tv0yCA3GjZVRTnq3HaGwwvghY5ew/s72-c/aghackathon-blog.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-2292899831695767070</id><published>2024-06-23T22:10:00.000-07:00</published><updated>2024-06-23T22:10:00.872-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="data-analysis"/><category scheme="http://www.blogger.com/atom/ns#" term="data-management"/><category scheme="http://www.blogger.com/atom/ns#" term="data-science"/><category scheme="http://www.blogger.com/atom/ns#" term="pandas"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><title type='text'>Book Report: Pandas Workout</title><summary type="text">Unlike many Data Scientists, I didn&#39;t automatically reach for Pandas when I needed to analyze data. I came upon this discipline (Data Science) as a Java Software Engineer who used Python for scripting, so I was quite comfortable operating on JSON / CSV / text files directly, loading data into relational databases and running SQL against them, and building visualizations with Matplotlib. So when </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/2292899831695767070/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/2292899831695767070' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2292899831695767070'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2292899831695767070'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/06/book-report-pandas-workout.html' title='Book Report: Pandas Workout'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-2003044845915300861</id><published>2024-05-18T06:56:00.000-07:00</published><updated>2024-05-18T07:21:45.074-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="evaluation"/><category scheme="http://www.blogger.com/atom/ns#" term="generative-ai"/><category scheme="http://www.blogger.com/atom/ns#" term="information-retrieval"/><category scheme="http://www.blogger.com/atom/ns#" term="large-language-models"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><title type='text'>Finetuning RAGAS Metrics using DSPy</title><summary type="text">Last month, I decided to sign-up for the Google AI Hackathon, where Google provided access to their Gemini Large Language Model (LLM) and tasked participants with building a creative application on top of it. I have worked with Anthropic&#39;s Claude and OpenAI&#39;s GPT-3 at work previously, and I was curious to see how Gemini stacked up against them. I was joined in that effort by David Campbell and </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/2003044845915300861/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/2003044845915300861' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2003044845915300861'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2003044845915300861'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/05/finetuning-ragas-metrics-using-dspy.html' title='Finetuning RAGAS Metrics using DSPy'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgf3xcdvyPhBRcKeO10BcpbkbTsUzP46Uv2_RWM94qoONlBpcyfNNlMPPzkSmUmjDauXpL7VrXEVxGtGHFm1AbrRedyugH58SOxzZvfCP2kOlgId6HLLyxmsU80ZcHCFOl4mwP3uBVjhxrFtNR62BcJ0MU7Oz00-9o6cj6k1JkMTUhWtJqMCgrunA/s72-c/lcel-dspy-eval.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-2267706794105371634</id><published>2024-05-14T18:31:00.000-07:00</published><updated>2024-05-16T07:32:14.220-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="search"/><category scheme="http://www.blogger.com/atom/ns#" term="vector-search"/><title type='text'>Performance Analysis of Float vs Byte vs Binary Vectors on OpenSearch</title><summary type="text">I&#39;ve been working on an application where, given an input string, the objective is to recommend an output string that is similar to the input string, for some notion of similarity. A machine learning model, in this case a SentenceTransformers model, is taught this notion of similarity by showing it many examples of input-output pairs. The model&#39;s weights are then used to encode the part to be </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/2267706794105371634/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/2267706794105371634' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2267706794105371634'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2267706794105371634'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/05/performance-analysis-of-float-vs-byte.html' title='Performance Analysis of Float vs Byte vs Binary Vectors on OpenSearch'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgFOfQa_QgIJ-rSxssE8zToFIeJw_xQhPe4OVxRa_mZ6hrb32WXBMQRX8l9E4cKaj3BJ80nP9zIUQ8Jc5rvJNXSVqQKGme3tmqm57TvEv2We_VfmUOUKX2QAqM7CIOHo4TKCuock3URMcY-778PBTaJYjq-3eWC1aAHMK7lW85lvHZQBmE_0d2wIg/s72-c/perf_chart_mrr_recall.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-7969812542594217370</id><published>2024-05-07T16:59:00.000-07:00</published><updated>2024-05-09T10:46:50.146-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="conference"/><category scheme="http://www.blogger.com/atom/ns#" term="knowledge-graph"/><title type='text'>KGC/HCLS 2024 Trip Report</title><summary type="text">I was at KGC (Knowledge Graph Conference) 2024, which is happening May 6-10 at Cornell Tech. I was presenting (virtually) at their Health Care and Life Sciences (HCLS) workshop, so my speakers pass was only valid for today for the HCLS portion of KGC. My trip report covers a few talks that I attended here. Attending virtually was a bit chaotic as sessions went over sometimes, so you might leave a</summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/7969812542594217370/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/7969812542594217370' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/7969812542594217370'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/7969812542594217370'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/05/kgchcls-2024-trip-report.html' title='KGC/HCLS 2024 Trip Report'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjF8O2t3xfChhNAmgqGjCm_zarX8FdfuwHJ04EPSOyWqWXJv_Wiw2qm-i-_CTOBxsW8I1q28lVUaS29jHnk1Vp2O5jXkhgNb0h1eLpsPOgpPQCGybGD3e_pyuEvP_JaoHRa3pAlsoxkmItA4y9Ws608OPqyyXEP6HWNBC-6ewu2eLO2kvKZxBtCEA/s72-c/Picture1.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-8966343464351206346</id><published>2024-03-23T16:11:00.000-07:00</published><updated>2024-03-23T17:11:51.744-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="biomedical-informatics"/><category scheme="http://www.blogger.com/atom/ns#" term="general"/><category scheme="http://www.blogger.com/atom/ns#" term="machine-learning"/><title type='text'>Book Report: Machine Learning for Drug Discovery</title><summary type="text">Drug Discovery is a field where biochemists (and more recently computer scientists) turn ideas into potential medications. I first came across a few applications in this area when checking out how to build Graph Neural Networks (GNN) as part of auditing the CS224W: Machine Learning with Graphs course from Stanford, some learnings of which I recycled into my Deep Learning with Graphs tutorial at </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/8966343464351206346/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/8966343464351206346' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/8966343464351206346'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/8966343464351206346'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/03/book-report-machine-learning-for-drug.html' title='Book Report: Machine Learning for Drug Discovery'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-2444654388244073272</id><published>2024-03-17T13:26:00.000-07:00</published><updated>2024-03-17T13:39:12.022-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="generative-ai"/><category scheme="http://www.blogger.com/atom/ns#" term="large-language-models"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><category scheme="http://www.blogger.com/atom/ns#" term="summarization"/><title type='text'>Hierarchical (and other) Indexes using LlamaIndex for RAG Content Enrichment</title><summary type="text">At our weekly This Week in Machine Learning (TWIML) meetings, (our leader and facilitataor) Darin Plutchok pointed out a LinkedIn blog post on Semantic Chunking that has been recently implemented in the LangChain framework. Unlike more traditional chunking approaches that use number of tokens or separator tokens as a guide, this one chunks groups of sentences into semantic units by breaking them </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/2444654388244073272/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/2444654388244073272' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2444654388244073272'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/2444654388244073272'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/03/hierarchical-and-other-indexes-using.html' title='Hierarchical (and other) Indexes using LlamaIndex for RAG Content Enrichment'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhbn8J7Ek2JpyIU0W0031059-CwntE6od101TvRW7uKQoevRMpnp5Og8cFpkYUwVoZahbXDrWL33mLyJLXd3x5WeT-p_wEg8t8_X5w3HsGU9RLnWDbPMqOD0Nk5Ntgc_JcyHhg6MDFE2MQGPB8n4TdEOlj0zEX0aD51hVQQdUpqzUA89O9swTTeag/s72-c/raptor_gmm_90.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-13596624551983867</id><published>2024-02-24T17:31:00.000-08:00</published><updated>2024-02-24T17:38:24.922-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="large-language-models"/><category scheme="http://www.blogger.com/atom/ns#" term="prompt-engineering"/><category scheme="http://www.blogger.com/atom/ns#" term="question-answering"/><category scheme="http://www.blogger.com/atom/ns#" term="question-generation"/><title type='text'>Thoughts on using LangChain LCEL with Claude</title><summary type="text">I got into Natural Language Processing (NLP) and Machine Learning (ML) through Search. And this led me into Generative AI (GenAI), which led me back to Search via Retrieval Augmented Generation (RAG). RAG started out relatively simple -- take a query, generate search results, use search results as context for a Large Language Model (LLM) to generate an abstractive summary of the results. Back </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/13596624551983867/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/13596624551983867' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/13596624551983867'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/13596624551983867'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/02/thoughts-on-using-langchain-lcel-with.html' title='Thoughts on using LangChain LCEL with Claude'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhBA2yw4xJnvCZQsfbasY49ZdfhrHp79JCW76YqVLNvombNnmBIVWyNvRwg70sjNSPL1qMQYQRZ_3OAC4KwHB9w9LL1gmGdsoUCTdtABdJxRvS9gX_1kka8XpvXJg65X8LDDAZjApH9cbnRy2OWw97Ue1X3rqYlZGdWZrwlm-j1C9gFb85w84eW_Q/s72-c/qgen-eval-chain.drawio.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-6145352586582115069</id><published>2024-02-03T15:40:00.000-08:00</published><updated>2024-02-04T08:32:38.449-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="statistics"/><title type='text'>Book Report: Allen B Downey&#39;s Probably Overthinking It</title><summary type="text">I have read Allen Downey&#39;s books on statistics in the past, when trying to turn myself from a Software Engineer into what Josh Wills says a Data Scientist is -- someone who is better at statistics than a Software Engineer and better at software than a statistician (with somewhat limited success in the first area, I will hasten to add). Last year, I had the good fortune to present at PyData Global</summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/6145352586582115069/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/6145352586582115069' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/6145352586582115069'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/6145352586582115069'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/02/book-report-allen-b-downeys-probably.html' title='Book Report: Allen B Downey&#39;s Probably Overthinking It'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-1026445863886131169</id><published>2024-01-01T12:53:00.000-08:00</published><updated>2024-01-01T12:57:25.173-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="knowledge-graph"/><category scheme="http://www.blogger.com/atom/ns#" term="named-entity-linking"/><category scheme="http://www.blogger.com/atom/ns#" term="named-entity-recognition"/><category scheme="http://www.blogger.com/atom/ns#" term="nlp"/><category scheme="http://www.blogger.com/atom/ns#" term="sentence-transformers"/><category scheme="http://www.blogger.com/atom/ns#" term="transformers"/><title type='text'>Knowledge Graph Aligned Entity Linker using SentenceTransformers</title><summary type="text">Most of us are familiar with Named Entity Recognizers (NERs) that can recognize spans in text as belonging to a small number of classes, such as Person (PER), Organization (ORG), Location (LOC), etc. These are usually multi-class classifier models, trained on input sequences to return BIO (Begin-Input-Output) tags for each token. However, recognizing entities in a Knowledge Graph (KG) using this </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/1026445863886131169/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/1026445863886131169' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/1026445863886131169'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/1026445863886131169'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2024/01/knowledge-graph-aligned-entity-linker.html' title='Knowledge Graph Aligned Entity Linker using SentenceTransformers'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhoZYw8jglKrqKqb7e7kiDW3iyd7LhyKulkWvoHGHTJV2b4a_zln1iq73R-z_sqc-B3NGT3FyKyw_LgHCewE4i8vBadJtJofmjX3ayDmWvG2F7RaDvwFqOh_CNakdGZCuZUpph18Wuu4OlA3U2nmsOPl27QVHrmjaPpuhCcJBMVhlDb8kAP3aGwEg/s72-c/dist-kgnel-bmbert-mnr.png" height="72" width="72"/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-5896316554942919450</id><published>2023-12-09T12:38:00.000-08:00</published><updated>2023-12-09T12:38:28.455-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="general"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><title type='text'>PyData Global 2023: Trip Report</title><summary type="text">I had the opportunity to present at PyData Global this year. It is a virtual conference that ran over 3 days in multiple tracks from December 6 to 8. I talked about Building Learning to Rank models for search using Large Language Models. For those attending the conference, I already shared the links to the slides and the associated code on its Discord channel, but for those who are not, they are </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/5896316554942919450/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/5896316554942919450' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/5896316554942919450'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/5896316554942919450'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2023/12/pydata-global-2023-trip-report.html' title='PyData Global 2023: Trip Report'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-9162304331871008979</id><published>2023-12-03T13:50:00.000-08:00</published><updated>2023-12-03T13:50:43.224-08:00</updated><title type='text'>Building Learning to Rank Models with Generative AI</title><summary type="text">Generative AI has been the new cool kid on the AI / ML block since early this year. Like everyone else, I continue to be amazed and wowed with each successive success story as they break existing benchmark records and showcase novel applications built on top of their new functionality. I was also lucky to be involved in a Generative AI project since the middle of this year, which gave me access </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/9162304331871008979/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/9162304331871008979' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/9162304331871008979'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/9162304331871008979'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2023/12/building-learning-to-rank-models-with.html' title='Building Learning to Rank Models with Generative AI'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-6305567007053567601</id><published>2023-10-07T09:57:00.002-07:00</published><updated>2023-12-11T08:49:01.363-08:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="machine-learning"/><category scheme="http://www.blogger.com/atom/ns#" term="python"/><category scheme="http://www.blogger.com/atom/ns#" term="spark"/><title type='text'>A PySpark idiom for efficient Model Inference</title><summary type="text">I recently needed to build an Apache Spark (PySpark) job where the task was (among other things) to use a Language Model (LM) to encode text into vectors. This is an embarassingly parallel job where the text to encoding is one to one, so something like Spark works very well here. We could, in theory at least, achieve a N-fold performance improvement by horizontally partitioning the data into N </summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/6305567007053567601/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/6305567007053567601' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/6305567007053567601'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/6305567007053567601'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2023/10/a-pyspark-idiom-for-efficient-model.html' title='A PySpark idiom for efficient Model Inference'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-7583720.post-3144991686109696486</id><published>2023-06-24T22:17:00.004-07:00</published><updated>2023-06-24T22:17:31.314-07:00</updated><category scheme="http://www.blogger.com/atom/ns#" term="biomedical-informatics"/><category scheme="http://www.blogger.com/atom/ns#" term="image-processing"/><title type='text'>BMI 702 Review Part IV -- Biomedical Imaging</title><summary type="text">Here is Part IV of my ongoing review of the Biomedical Artificial Intelligence (BMI 702) course, part of Harvard&#39;s Foundation of Biomedical Informatics 2023 Spring session, taught by Prof Marinka Zitnik and her team. If you want to check out my previous reviews in this series, they are listed below.


  BMI 702 Review Part I
  BMI 702 Review Part II (Graph Learning)
  BMI 702 Review Part III (</summary><link rel='replies' type='application/atom+xml' href='http://sujitpal.blogspot.com/feeds/3144991686109696486/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment/fullpage/post/7583720/3144991686109696486' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3144991686109696486'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/7583720/posts/default/3144991686109696486'/><link rel='alternate' type='text/html' href='http://sujitpal.blogspot.com/2023/06/bmi-702-review-part-iv-biomedical.html' title='BMI 702 Review Part IV -- Biomedical Imaging'/><author><name>Sujit Pal</name><uri>http://www.blogger.com/profile/06835223352394332155</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgPKQuNrZyKjCIaE3AFdwZfF0mMrhdKHyUNu9Yp8pC06yaR6iM4V0tsFJ6KL5oG4NQONHz6yKUZ0gqZ9xhyQ0logHD3n0aeBzCFWnJxaGj1Cl0ylj-KPRf8cx94qOlGLVQ/s220/me.png'/></author><thr:total>0</thr:total></entry></feed>